From 9d8397be110cfc3c51837025ed3767d420c4b723 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Mon, 29 Dec 2025 15:51:29 -0500 Subject: [PATCH] add CDNA3+RDNA4 support (#13882) * fix CI * remove junk * rename lib to dsl * correct * cleanups --- .github/workflows/test.yml | 19 +- CLAUDE.md | 8 +- extra/assembly/amd/asm.py | 2 +- .../amd/autogen/{cdna4 => cdna}/__init__.py | 45 +- extra/assembly/amd/autogen/cdna/gen_pcode.py | 13131 ++++++++++++++++ extra/assembly/amd/autogen/cdna4/gen_pcode.py | 1630 -- extra/assembly/amd/autogen/rdna3/__init__.py | 4 +- extra/assembly/amd/autogen/rdna4/__init__.py | 3051 ++++ extra/assembly/amd/autogen/rdna4/gen_pcode.py | 13053 +++++++++++++++ extra/assembly/amd/{lib.py => dsl.py} | 120 +- extra/assembly/amd/emu.py | 2 +- extra/assembly/amd/pcode.py | 111 +- .../amd/test/external_test_usability.py | 2 +- extra/assembly/amd/test/test_emu.py | 2 +- extra/assembly/amd/test/test_formats.py | 8 +- extra/assembly/amd/test/test_handwritten.py | 2 +- extra/assembly/amd/test/test_pdf_parser.py | 2 +- extra/assembly/amd/test/test_roundtrip.py | 2 +- 18 files changed, 29464 insertions(+), 1730 deletions(-) rename extra/assembly/amd/autogen/{cdna4 => cdna}/__init__.py (99%) create mode 100644 extra/assembly/amd/autogen/cdna/gen_pcode.py delete mode 100644 extra/assembly/amd/autogen/cdna4/gen_pcode.py create mode 100644 extra/assembly/amd/autogen/rdna4/__init__.py create mode 100644 extra/assembly/amd/autogen/rdna4/gen_pcode.py rename extra/assembly/amd/{lib.py => dsl.py} (84%) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ef528e6e72..b77fd7068e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -679,22 +679,11 @@ jobs: run: python -m pytest -n=auto extra/assembly/amd/ --durations 20 - name: Install pdfplumber run: pip install pdfplumber - - name: Verify RDNA3 autogen is up to date + - name: Verify AMD autogen is up to date run: | - python -m extra.assembly.amd.lib --arch rdna3 - git diff --exit-code extra/assembly/amd/autogen/rdna3/__init__.py - - name: Verify CDNA4 autogen is up to date - run: | - python -m extra.assembly.amd.lib --arch cdna4 - git diff --exit-code extra/assembly/amd/autogen/cdna4/__init__.py - - name: Verify RDNA3 pcode autogen is up to date - run: | - python -m extra.assembly.amd.pcode --arch rdna3 - git diff --exit-code extra/assembly/amd/autogen/rdna3/gen_pcode.py - - name: Verify CDNA4 pcode autogen is up to date - run: | - python -m extra.assembly.amd.pcode --arch cdna4 - git diff --exit-code extra/assembly/amd/autogen/cdna4/gen_pcode.py + python -m extra.assembly.amd.dsl --arch all + python -m extra.assembly.amd.pcode --arch all + git diff --exit-code extra/assembly/amd/autogen/ testnvidia: strategy: diff --git a/CLAUDE.md b/CLAUDE.md index 145841e2e7..ac8ed615a3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -76,10 +76,10 @@ VIZ=1 python -c "from tinygrad import Tensor; Tensor.ones(10).sum().realize()" ## Auto-generated Files (DO NOT EDIT) The following files are auto-generated and should never be edited manually: -- `extra/assembly/amd/autogen/rdna3/__init__.py` - Generated by `python -m extra.assembly.amd.lib --arch rdna3` -- `extra/assembly/amd/autogen/rdna3/gen_pcode.py` - Generated by `python -m extra.assembly.amd.pcode --arch rdna3` -- `extra/assembly/amd/autogen/cdna4/__init__.py` - Generated by `python -m extra.assembly.amd.lib --arch cdna4` -- `extra/assembly/amd/autogen/cdna4/gen_pcode.py` - Generated by `python -m extra.assembly.amd.pcode --arch cdna4` +- `extra/assembly/amd/autogen/{arch}/__init__.py` - Generated by `python -m extra.assembly.amd.dsl --arch {arch}` +- `extra/assembly/amd/autogen/{arch}/gen_pcode.py` - Generated by `python -m extra.assembly.amd.pcode --arch {arch}` + +Where `{arch}` is one of: `rdna3`, `rdna4`, `cdna` To add missing instruction implementations, add them to `extra/assembly/amd/emu.py` instead. diff --git a/extra/assembly/amd/asm.py b/extra/assembly/amd/asm.py index 50a382d0f8..7fcef5e64e 100644 --- a/extra/assembly/amd/asm.py +++ b/extra/assembly/amd/asm.py @@ -1,7 +1,7 @@ # RDNA3 assembler and disassembler from __future__ import annotations import re -from extra.assembly.amd.lib import Inst, RawImm, Reg, SGPR, VGPR, TTMP, s, v, ttmp, _RegFactory, FLOAT_ENC, SRC_FIELDS, unwrap +from extra.assembly.amd.dsl import Inst, RawImm, Reg, SGPR, VGPR, TTMP, s, v, ttmp, _RegFactory, FLOAT_ENC, SRC_FIELDS, unwrap # Decoding helpers SPECIAL_GPRS = {106: "vcc_lo", 107: "vcc_hi", 124: "null", 125: "m0", 126: "exec_lo", 127: "exec_hi", 253: "scc"} diff --git a/extra/assembly/amd/autogen/cdna4/__init__.py b/extra/assembly/amd/autogen/cdna/__init__.py similarity index 99% rename from extra/assembly/amd/autogen/cdna4/__init__.py rename to extra/assembly/amd/autogen/cdna/__init__.py index 568878d989..c1c1ecaaad 100644 --- a/extra/assembly/amd/autogen/cdna4/__init__.py +++ b/extra/assembly/amd/autogen/cdna/__init__.py @@ -1,7 +1,7 @@ -# autogenerated from AMD CDNA4 ISA PDF by lib.py - do not edit +# autogenerated from AMD CDNA3+CDNA4 ISA PDF by dsl.py - do not edit from enum import IntEnum from typing import Annotated -from extra.assembly.amd.lib import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField +from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField import functools class SrcEnum(IntEnum): @@ -158,6 +158,12 @@ class DSOp(IntEnum): DS_READ2ST64_B64 = 120 DS_ADD_RTN_F64 = 124 DS_CONDXCHG32_RTN_B64 = 126 + DS_GWS_SEMA_RELEASE_ALL = 152 + DS_GWS_INIT = 153 + DS_GWS_SEMA_V = 154 + DS_GWS_SEMA_BR = 155 + DS_GWS_SEMA_P = 156 + DS_GWS_BARRIER = 157 DS_READ_ADDTID_B32 = 182 DS_PK_ADD_RTN_F16 = 183 DS_PK_ADD_RTN_BF16 = 184 @@ -1385,6 +1391,8 @@ class VOP3POp(IntEnum): V_SMFMAC_F32_16X16X128_BF8_BF8 = 59 V_SMFMAC_F32_16X16X128_BF8_FP8 = 60 V_SMFMAC_F32_16X16X128_FP8_BF8 = 61 + V_MFMA_F32_16X16X8_XF32 = 62 + V_MFMA_F32_32X32X4_XF32 = 63 V_MFMA_F32_32X32X1_2B_F32 = 64 V_MFMA_F32_16X16X1_4B_F32 = 65 V_MFMA_F32_4X4X1_16B_F32 = 66 @@ -1648,6 +1656,11 @@ class VOPCOp(IntEnum): # instruction formats class DPP(Inst64): encoding = bits[31:26] == 0b110110 + src1_sel = bits[58:56] + src1_sext = bits[59] + src1_neg = bits[60] + src1_abs = bits[61] + s1 = bits[63] offset0 = bits[7:0] offset1 = bits[15:8] op = bits[24:17] @@ -1667,6 +1680,7 @@ class DS(Inst64): data1:VGPRField = bits[55:48] offset0 = bits[7:0] offset1 = bits[15:8] + gds = bits[16] acc = bits[25] class FLAT(Inst64): @@ -1694,10 +1708,10 @@ class MTBUF(Inst64): offset:Imm = bits[11:0] offen = bits[12] idxen = bits[13] - sc0 = bits[14] sc1 = bits[53] nt = bits[54] acc = bits[55] + sc0 = bits[14] class MUBUF(Inst64): encoding = bits[31:26] == 0b111000 @@ -1735,6 +1749,23 @@ class SDWA(Inst64): sd = bits[47] row_mask = bits[63:60] +class SDWAB(Inst64): + src0:Src = bits[39:32] + dst_sel = bits[42:40] + dst_u = bits[44:43] + clmp = bits[45] + omod = bits[47:46] + src0_sel = bits[50:48] + src0_sext = bits[51] + src0_neg = bits[52] + src0_abs = bits[53] + s0 = bits[55] + src1_sel = bits[58:56] + src1_sext = bits[59] + src1_neg = bits[60] + src1_abs = bits[61] + s1 = bits[63] + class SMEM(Inst64): encoding = bits[31:26] == 0b110000 op:Annotated[BitField, SMEMOp] = bits[25:18] @@ -1950,6 +1981,12 @@ ds_read2_b64 = functools.partial(DS, DSOp.DS_READ2_B64) ds_read2st64_b64 = functools.partial(DS, DSOp.DS_READ2ST64_B64) ds_add_rtn_f64 = functools.partial(DS, DSOp.DS_ADD_RTN_F64) ds_condxchg32_rtn_b64 = functools.partial(DS, DSOp.DS_CONDXCHG32_RTN_B64) +ds_gws_sema_release_all = functools.partial(DS, DSOp.DS_GWS_SEMA_RELEASE_ALL) +ds_gws_init = functools.partial(DS, DSOp.DS_GWS_INIT) +ds_gws_sema_v = functools.partial(DS, DSOp.DS_GWS_SEMA_V) +ds_gws_sema_br = functools.partial(DS, DSOp.DS_GWS_SEMA_BR) +ds_gws_sema_p = functools.partial(DS, DSOp.DS_GWS_SEMA_P) +ds_gws_barrier = functools.partial(DS, DSOp.DS_GWS_BARRIER) ds_read_addtid_b32 = functools.partial(DS, DSOp.DS_READ_ADDTID_B32) ds_pk_add_rtn_f16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_F16) ds_pk_add_rtn_bf16 = functools.partial(DS, DSOp.DS_PK_ADD_RTN_BF16) @@ -3145,6 +3182,8 @@ v_smfmac_i32_16x16x128_i8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_I32_16X16X v_smfmac_f32_16x16x128_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_BF8) v_smfmac_f32_16x16x128_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_BF8_FP8) v_smfmac_f32_16x16x128_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SMFMAC_F32_16X16X128_FP8_BF8) +v_mfma_f32_16x16x8_xf32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X8_XF32) +v_mfma_f32_32x32x4_xf32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X4_XF32) v_mfma_f32_32x32x1_2b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_32X32X1_2B_F32) v_mfma_f32_16x16x1_4b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_16X16X1_4B_F32) v_mfma_f32_4x4x1_16b_f32 = functools.partial(VOP3P, VOP3POp.V_MFMA_F32_4X4X1_16B_F32) diff --git a/extra/assembly/amd/autogen/cdna/gen_pcode.py b/extra/assembly/amd/autogen/cdna/gen_pcode.py new file mode 100644 index 0000000000..ae7ea3e029 --- /dev/null +++ b/extra/assembly/amd/autogen/cdna/gen_pcode.py @@ -0,0 +1,13131 @@ +# autogenerated by pcode.py - do not edit +# to regenerate: python -m extra.assembly.amd.pcode --arch cdna +# ruff: noqa: E501,F405,F403 +# mypy: ignore-errors +from extra.assembly.amd.autogen.cdna import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3POp, VOPCOp, VOP3AOp, VOP3BOp +from extra.assembly.amd.pcode import * + +def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.b32 = S0.b32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b32 = S0.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.b64 = S0.b64 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b64 = S0.b64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if SCC then + # D0.b32 = S0.b32 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + if SCC: + D0.b32 = S0.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if SCC then + # D0.b64 = S0.b64 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + if SCC: + D0.b64 = S0.b64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~S0.u32; + # SCC = D0.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = ~S0.u32 + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = ~S0.u64; + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = ~S0.u64 + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32[31 : 0] = S0.u32[0 : 31] + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32[31 : 0] = S0.u32[0 : 31] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[63 : 0] = S0.u64[0 : 63] + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64[63 : 0] = S0.u64[0 : 63] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 0; + # for i in 0 : 31 do + # tmp += S0.u32[i] == 1'0U ? 1 : 0 + # endfor; + # D0.i32 = tmp; + # SCC = D0.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(0) + for i in range(0, int(31)+1): + tmp += ((1) if (S0.u32[i] == 0) else (0)) + D0.i32 = tmp + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 0; + # for i in 0 : 63 do + # tmp += S0.u64[i] == 1'0U ? 1 : 0 + # endfor; + # D0.i32 = tmp; + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(0) + for i in range(0, int(63)+1): + tmp += ((1) if (S0.u64[i] == 0) else (0)) + D0.i32 = tmp + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 0; + # for i in 0 : 31 do + # tmp += S0.u32[i] == 1'1U ? 1 : 0 + # endfor; + # D0.i32 = tmp; + # SCC = D0.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(0) + for i in range(0, int(31)+1): + tmp += ((1) if (S0.u32[i] == 1) else (0)) + D0.i32 = tmp + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 0; + # for i in 0 : 63 do + # tmp += S0.u64[i] == 1'1U ? 1 : 0 + # endfor; + # D0.i32 = tmp; + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(0) + for i in range(0, int(63)+1): + tmp += ((1) if (S0.u64[i] == 1) else (0)) + D0.i32 = tmp + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_FF0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if no zeros are found + # for i in 0 : 31 do + # // Search from LSB + # if S0.u32[i] == 1'0U then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(0, int(31)+1): + if S0.u32[i] == 0: + tmp = Reg(i) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_FF0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if no zeros are found + # for i in 0 : 63 do + # // Search from LSB + # if S0.u64[i] == 1'0U then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(0, int(63)+1): + if S0.u64[i] == 0: + tmp = Reg(i) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_FF1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if no ones are found + # for i in 0 : 31 do + # // Search from LSB + # if S0.u32[i] == 1'1U then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(0, int(31)+1): + if S0.u32[i] == 1: + tmp = Reg(i) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_FF1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if no ones are found + # for i in 0 : 63 do + # // Search from LSB + # if S0.u64[i] == 1'1U then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(0, int(63)+1): + if S0.u64[i] == 1: + tmp = Reg(i) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_FLBIT_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if no ones are found + # for i in 0 : 31 do + # // Search from MSB + # if S0.u32[31 - i] == 1'1U then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(0, int(31)+1): + if S0.u32[31 - i] == 1: + tmp = Reg(i) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_FLBIT_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if no ones are found + # for i in 0 : 63 do + # // Search from MSB + # if S0.u64[63 - i] == 1'1U then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(0, int(63)+1): + if S0.u64[63 - i] == 1: + tmp = Reg(i) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_FLBIT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if all bits are the same + # for i in 1 : 31 do + # // Search from MSB + # if S0.u32[31 - i] != S0.u32[31] then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(1, int(31)+1): + if S0.u32[31 - i] != S0.u32[31]: + tmp = Reg(i) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_FLBIT_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if all bits are the same + # for i in 1 : 63 do + # // Search from MSB + # if S0.u64[63 - i] != S0.u64[63] then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(1, int(63)+1): + if S0.u64[63 - i] != S0.u64[63]: + tmp = Reg(i) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(signext(S0.i8)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (signext(S0.i8)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(signext(S0.i16)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (signext(S0.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32[S0.u32[4 : 0]] = 1'0U + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32[S0.u32[4 : 0]] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[S0.u32[5 : 0]] = 1'0U + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64[S0.u32[5 : 0]] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32[S0.u32[4 : 0]] = 1'1U + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32[S0.u32[4 : 0]] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[S0.u32[5 : 0]] = 1'1U + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64[S0.u32[5 : 0]] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u64; + # EXEC.u64 = (S0.u64 & EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (S0.u64 & EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set + # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination + # saveexec = EXEC.u64; + # EXEC.u64 = (S0.u64 | EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (S0.u64 | EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u64; + # EXEC.u64 = (S0.u64 ^ EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (S0.u64 ^ EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_ANDN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into + # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into + # saveexec = EXEC.u64; + # EXEC.u64 = (S0.u64 & ~EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (S0.u64 & ~EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_ORN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the + # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the + # saveexec = EXEC.u64; + # EXEC.u64 = (S0.u64 | ~EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (S0.u64 | ~EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u64; + # EXEC.u64 = ~(S0.u64 & EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = ~(S0.u64 & EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u64; + # EXEC.u64 = ~(S0.u64 | EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = ~(S0.u64 | EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u64; + # EXEC.u64 = ~(S0.u64 ^ EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = ~(S0.u64 ^ EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; + # SCC = D0.i32 != 0 + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.i32 = ((-S0.i32) if (S0.i32 < 0) else (S0.i32)) + SCC = Reg(D0.i32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP1Op_S_SET_GPR_IDX_IDX(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # M0[7 : 0] = S0.u32[7 : 0].b8 + S0 = Reg(s0) + # --- compiled pseudocode --- + M0[7 : 0] = S0.u32[7 : 0].b8 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _SOP1Op_S_ANDN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into + # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into + # saveexec = EXEC.u64; + # EXEC.u64 = (~S0.u64 & EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (~S0.u64 & EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_ORN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the + # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the + # saveexec = EXEC.u64; + # EXEC.u64 = (~S0.u64 | EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (~S0.u64 | EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_ANDN1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into + # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op + # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is + # EXEC.u64 = (~S0.u64 & EXEC.u64); + # D0.u64 = EXEC.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + # --- compiled pseudocode --- + EXEC.u64 = (~S0.u64 & EXEC.u64) + D0.u64 = EXEC.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_ANDN2_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into + # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op + # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is + # EXEC.u64 = (S0.u64 & ~EXEC.u64); + # D0.u64 = EXEC.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + # --- compiled pseudocode --- + EXEC.u64 = (S0.u64 & ~EXEC.u64) + D0.u64 = EXEC.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32; + # for i in 0 : 31 do + # D0.u64[i * 2] = tmp[i]; + # D0.u64[i * 2 + 1] = tmp[i] + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.u32) + for i in range(0, int(31)+1): + D0.u64[i * 2] = tmp[i] + D0.u64[i * 2 + 1] = tmp[i] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +SOP1Op_FUNCTIONS = { + SOP1Op.S_MOV_B32: _SOP1Op_S_MOV_B32, + SOP1Op.S_MOV_B64: _SOP1Op_S_MOV_B64, + SOP1Op.S_CMOV_B32: _SOP1Op_S_CMOV_B32, + SOP1Op.S_CMOV_B64: _SOP1Op_S_CMOV_B64, + SOP1Op.S_NOT_B32: _SOP1Op_S_NOT_B32, + SOP1Op.S_NOT_B64: _SOP1Op_S_NOT_B64, + SOP1Op.S_BREV_B32: _SOP1Op_S_BREV_B32, + SOP1Op.S_BREV_B64: _SOP1Op_S_BREV_B64, + SOP1Op.S_BCNT0_I32_B32: _SOP1Op_S_BCNT0_I32_B32, + SOP1Op.S_BCNT0_I32_B64: _SOP1Op_S_BCNT0_I32_B64, + SOP1Op.S_BCNT1_I32_B32: _SOP1Op_S_BCNT1_I32_B32, + SOP1Op.S_BCNT1_I32_B64: _SOP1Op_S_BCNT1_I32_B64, + SOP1Op.S_FF0_I32_B32: _SOP1Op_S_FF0_I32_B32, + SOP1Op.S_FF0_I32_B64: _SOP1Op_S_FF0_I32_B64, + SOP1Op.S_FF1_I32_B32: _SOP1Op_S_FF1_I32_B32, + SOP1Op.S_FF1_I32_B64: _SOP1Op_S_FF1_I32_B64, + SOP1Op.S_FLBIT_I32_B32: _SOP1Op_S_FLBIT_I32_B32, + SOP1Op.S_FLBIT_I32_B64: _SOP1Op_S_FLBIT_I32_B64, + SOP1Op.S_FLBIT_I32: _SOP1Op_S_FLBIT_I32, + SOP1Op.S_FLBIT_I32_I64: _SOP1Op_S_FLBIT_I32_I64, + SOP1Op.S_SEXT_I32_I8: _SOP1Op_S_SEXT_I32_I8, + SOP1Op.S_SEXT_I32_I16: _SOP1Op_S_SEXT_I32_I16, + SOP1Op.S_BITSET0_B32: _SOP1Op_S_BITSET0_B32, + SOP1Op.S_BITSET0_B64: _SOP1Op_S_BITSET0_B64, + SOP1Op.S_BITSET1_B32: _SOP1Op_S_BITSET1_B32, + SOP1Op.S_BITSET1_B64: _SOP1Op_S_BITSET1_B64, + SOP1Op.S_AND_SAVEEXEC_B64: _SOP1Op_S_AND_SAVEEXEC_B64, + SOP1Op.S_OR_SAVEEXEC_B64: _SOP1Op_S_OR_SAVEEXEC_B64, + SOP1Op.S_XOR_SAVEEXEC_B64: _SOP1Op_S_XOR_SAVEEXEC_B64, + SOP1Op.S_ANDN2_SAVEEXEC_B64: _SOP1Op_S_ANDN2_SAVEEXEC_B64, + SOP1Op.S_ORN2_SAVEEXEC_B64: _SOP1Op_S_ORN2_SAVEEXEC_B64, + SOP1Op.S_NAND_SAVEEXEC_B64: _SOP1Op_S_NAND_SAVEEXEC_B64, + SOP1Op.S_NOR_SAVEEXEC_B64: _SOP1Op_S_NOR_SAVEEXEC_B64, + SOP1Op.S_XNOR_SAVEEXEC_B64: _SOP1Op_S_XNOR_SAVEEXEC_B64, + SOP1Op.S_ABS_I32: _SOP1Op_S_ABS_I32, + SOP1Op.S_SET_GPR_IDX_IDX: _SOP1Op_S_SET_GPR_IDX_IDX, + SOP1Op.S_ANDN1_SAVEEXEC_B64: _SOP1Op_S_ANDN1_SAVEEXEC_B64, + SOP1Op.S_ORN1_SAVEEXEC_B64: _SOP1Op_S_ORN1_SAVEEXEC_B64, + SOP1Op.S_ANDN1_WREXEC_B64: _SOP1Op_S_ANDN1_WREXEC_B64, + SOP1Op.S_ANDN2_WREXEC_B64: _SOP1Op_S_ANDN2_WREXEC_B64, + SOP1Op.S_BITREPLICATE_B64_B32: _SOP1Op_S_BITREPLICATE_B64_B32, +} + +def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 64'U(S0.u32) + 64'U(S1.u32); + # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg((S0.u32) + (S1.u32)) + SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32 - S1.u32; + # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.u32 - S1.u32) + SCC = Reg(((1) if (S1.u32 > S0.u32) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.i32 + S1.i32; + # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); + # D0.i32 = tmp.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.i32 + S1.i32) + SCC = Reg(((S0.u32[31] == S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) + D0.i32 = tmp.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.i32 - S1.i32; + # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); + # D0.i32 = tmp.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.i32 - S1.i32) + SCC = Reg(((S0.u32[31] != S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) + D0.i32 = tmp.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; + # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg((S0.u32) + (S1.u32) + SCC.u64) + SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32 - S1.u32 - SCC.u32; + # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.u32 - S1.u32 - SCC.u32) + SCC = Reg(((1) if ((S1.u32) + SCC.u64 > (S0.u32)) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 < S1.i32; + # D0.i32 = SCC ? S0.i32 : S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 < S1.i32) + D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 < S1.u32; + # D0.u32 = SCC ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 < S1.u32) + D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 >= S1.i32; + # D0.i32 = SCC ? S0.i32 : S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 >= S1.i32) + D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 >= S1.u32; + # D0.u32 = SCC ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 >= S1.u32) + D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = SCC ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = SCC ? S0.u64 : S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = ((S0.u64) if (SCC) else (S1.u64)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 & S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 & S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 & S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 & S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 | S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 | S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 | S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 | S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 ^ S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 ^ S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 ^ S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 ^ S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_ANDN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 & ~S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 & ~S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_ANDN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 & ~S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 & ~S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_ORN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 | ~S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 | ~S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_ORN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 | ~S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 | ~S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~(S0.u32 & S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = ~(S0.u32 & S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = ~(S0.u64 & S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = ~(S0.u64 & S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~(S0.u32 | S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = ~(S0.u32 | S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = ~(S0.u64 | S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = ~(S0.u64 | S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~(S0.u32 ^ S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = ~(S0.u32 ^ S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = ~(S0.u64 ^ S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = ~(S0.u64 ^ S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 << S1[4 : 0].u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 << S1[4 : 0].u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 << S1[5 : 0].u32); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 << S1[5 : 0].u32) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 >> S1[4 : 0].u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 >> S1[4 : 0].u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 >> S1[5 : 0].u32); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 >> S1[5 : 0].u32) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); + # SCC = D0.i32 != 0 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.i32 = (signext(S0.i32) >> S1[4 : 0].u32) + SCC = Reg(D0.i32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); + # SCC = D0.i64 != 0LL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32) + SCC = Reg(D0.i64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64 = (((1 << S0[5 : 0].u32) - 1) << S1[5 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 * S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = S0.i32 * S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); + # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); + # SCC = D0.i32 != 0 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) + D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32) + SCC = Reg(D0.i32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); + # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); + # SCC = D0.i64 != 0LL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) + D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32) + SCC = Reg(D0.i64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 - S1.i32; + # if D0.i32 < 0 then + # D0.i32 = -D0.i32 + # endif; + # SCC = D0.i32 != 0 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.i32 = S0.i32 - S1.i32 + if D0.i32 < 0: + D0.i32 = -D0.i32 + SCC = Reg(D0.i32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (((S0.u32) * (S1.u32)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (((S0.i32) * (S1.i32)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); + # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(((S0.u32) << 1) + (S1.u32)) + SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); + # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(((S0.u32) << 2) + (S1.u32)) + SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); + # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(((S0.u32) << 3) + (S1.u32)) + SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); + # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(((S0.u32) << 4) + (S1.u32)) + SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0 = Reg(_pack(S1[15 : 0].u16, S0[15 : 0].u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0 = Reg(_pack(S1[31 : 16].u16, S0[15 : 0].u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0 = Reg(_pack(S1[31 : 16].u16, S0[31 : 16].u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +SOP2Op_FUNCTIONS = { + SOP2Op.S_ADD_U32: _SOP2Op_S_ADD_U32, + SOP2Op.S_SUB_U32: _SOP2Op_S_SUB_U32, + SOP2Op.S_ADD_I32: _SOP2Op_S_ADD_I32, + SOP2Op.S_SUB_I32: _SOP2Op_S_SUB_I32, + SOP2Op.S_ADDC_U32: _SOP2Op_S_ADDC_U32, + SOP2Op.S_SUBB_U32: _SOP2Op_S_SUBB_U32, + SOP2Op.S_MIN_I32: _SOP2Op_S_MIN_I32, + SOP2Op.S_MIN_U32: _SOP2Op_S_MIN_U32, + SOP2Op.S_MAX_I32: _SOP2Op_S_MAX_I32, + SOP2Op.S_MAX_U32: _SOP2Op_S_MAX_U32, + SOP2Op.S_CSELECT_B32: _SOP2Op_S_CSELECT_B32, + SOP2Op.S_CSELECT_B64: _SOP2Op_S_CSELECT_B64, + SOP2Op.S_AND_B32: _SOP2Op_S_AND_B32, + SOP2Op.S_AND_B64: _SOP2Op_S_AND_B64, + SOP2Op.S_OR_B32: _SOP2Op_S_OR_B32, + SOP2Op.S_OR_B64: _SOP2Op_S_OR_B64, + SOP2Op.S_XOR_B32: _SOP2Op_S_XOR_B32, + SOP2Op.S_XOR_B64: _SOP2Op_S_XOR_B64, + SOP2Op.S_ANDN2_B32: _SOP2Op_S_ANDN2_B32, + SOP2Op.S_ANDN2_B64: _SOP2Op_S_ANDN2_B64, + SOP2Op.S_ORN2_B32: _SOP2Op_S_ORN2_B32, + SOP2Op.S_ORN2_B64: _SOP2Op_S_ORN2_B64, + SOP2Op.S_NAND_B32: _SOP2Op_S_NAND_B32, + SOP2Op.S_NAND_B64: _SOP2Op_S_NAND_B64, + SOP2Op.S_NOR_B32: _SOP2Op_S_NOR_B32, + SOP2Op.S_NOR_B64: _SOP2Op_S_NOR_B64, + SOP2Op.S_XNOR_B32: _SOP2Op_S_XNOR_B32, + SOP2Op.S_XNOR_B64: _SOP2Op_S_XNOR_B64, + SOP2Op.S_LSHL_B32: _SOP2Op_S_LSHL_B32, + SOP2Op.S_LSHL_B64: _SOP2Op_S_LSHL_B64, + SOP2Op.S_LSHR_B32: _SOP2Op_S_LSHR_B32, + SOP2Op.S_LSHR_B64: _SOP2Op_S_LSHR_B64, + SOP2Op.S_ASHR_I32: _SOP2Op_S_ASHR_I32, + SOP2Op.S_ASHR_I64: _SOP2Op_S_ASHR_I64, + SOP2Op.S_BFM_B32: _SOP2Op_S_BFM_B32, + SOP2Op.S_BFM_B64: _SOP2Op_S_BFM_B64, + SOP2Op.S_MUL_I32: _SOP2Op_S_MUL_I32, + SOP2Op.S_BFE_U32: _SOP2Op_S_BFE_U32, + SOP2Op.S_BFE_I32: _SOP2Op_S_BFE_I32, + SOP2Op.S_BFE_U64: _SOP2Op_S_BFE_U64, + SOP2Op.S_BFE_I64: _SOP2Op_S_BFE_I64, + SOP2Op.S_ABSDIFF_I32: _SOP2Op_S_ABSDIFF_I32, + SOP2Op.S_MUL_HI_U32: _SOP2Op_S_MUL_HI_U32, + SOP2Op.S_MUL_HI_I32: _SOP2Op_S_MUL_HI_I32, + SOP2Op.S_LSHL1_ADD_U32: _SOP2Op_S_LSHL1_ADD_U32, + SOP2Op.S_LSHL2_ADD_U32: _SOP2Op_S_LSHL2_ADD_U32, + SOP2Op.S_LSHL3_ADD_U32: _SOP2Op_S_LSHL3_ADD_U32, + SOP2Op.S_LSHL4_ADD_U32: _SOP2Op_S_LSHL4_ADD_U32, + SOP2Op.S_PACK_LL_B32_B16: _SOP2Op_S_PACK_LL_B32_B16, + SOP2Op.S_PACK_LH_B32_B16: _SOP2Op_S_PACK_LH_B32_B16, + SOP2Op.S_PACK_HH_B32_B16: _SOP2Op_S_PACK_HH_B32_B16, +} + +def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 == S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 == S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 <> S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 != S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 > S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 > S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 >= S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 >= S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 < S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 < S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 <= S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 <= S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 == S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 == S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 <> S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 != S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 > S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 > S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 >= S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 >= S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 < S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 < S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 <= S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 <= S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32[S1.u32[4 : 0]] == 0) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32[S1.u32[4 : 0]] == 1) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u64[S1.u32[5 : 0]] == 0) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u64[S1.u32[5 : 0]] == 1) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_SETVSKIP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VSKIP = S0.u32[S1.u32[4 : 0]] + S0 = Reg(s0) + S1 = Reg(s1) + # --- compiled pseudocode --- + VSKIP = S0.u32[S1.u32[4 : 0]] + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _SOPCOp_S_SET_GPR_IDX_ON(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # specified in the SRC0 operand. The raw bits of the SRC1 field are read and used to set the enable bits. S1[0] = + # VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and S1[3] = VDST_REL. + # M0[7 : 0] = S0.u32[7 : 0].b8; + # // this is the direct content of raw S1 field + S0 = Reg(s0) + S1 = Reg(s1) + SRC0 = Reg(src0_idx) + VDST = Reg(vdst_idx) + # --- compiled pseudocode --- + specified in the SRC0 operand. The raw bits of the SRC1 field are read and used to set the enable bits. S1[0] = + VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and S1[3] = VDST_REL. + M0[7 : 0] = S0.u32[7 : 0].b8 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u64 == S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u64 == S1.u64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u64 <> S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u64 != S1.u64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +SOPCOp_FUNCTIONS = { + SOPCOp.S_CMP_EQ_I32: _SOPCOp_S_CMP_EQ_I32, + SOPCOp.S_CMP_LG_I32: _SOPCOp_S_CMP_LG_I32, + SOPCOp.S_CMP_GT_I32: _SOPCOp_S_CMP_GT_I32, + SOPCOp.S_CMP_GE_I32: _SOPCOp_S_CMP_GE_I32, + SOPCOp.S_CMP_LT_I32: _SOPCOp_S_CMP_LT_I32, + SOPCOp.S_CMP_LE_I32: _SOPCOp_S_CMP_LE_I32, + SOPCOp.S_CMP_EQ_U32: _SOPCOp_S_CMP_EQ_U32, + SOPCOp.S_CMP_LG_U32: _SOPCOp_S_CMP_LG_U32, + SOPCOp.S_CMP_GT_U32: _SOPCOp_S_CMP_GT_U32, + SOPCOp.S_CMP_GE_U32: _SOPCOp_S_CMP_GE_U32, + SOPCOp.S_CMP_LT_U32: _SOPCOp_S_CMP_LT_U32, + SOPCOp.S_CMP_LE_U32: _SOPCOp_S_CMP_LE_U32, + SOPCOp.S_BITCMP0_B32: _SOPCOp_S_BITCMP0_B32, + SOPCOp.S_BITCMP1_B32: _SOPCOp_S_BITCMP1_B32, + SOPCOp.S_BITCMP0_B64: _SOPCOp_S_BITCMP0_B64, + SOPCOp.S_BITCMP1_B64: _SOPCOp_S_BITCMP1_B64, + SOPCOp.S_SETVSKIP: _SOPCOp_S_SETVSKIP, + SOPCOp.S_SET_GPR_IDX_ON: _SOPCOp_S_SET_GPR_IDX_ON, + SOPCOp.S_CMP_EQ_U64: _SOPCOp_S_CMP_EQ_U64, + SOPCOp.S_CMP_LG_U64: _SOPCOp_S_CMP_LG_U64, +} + +def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(signext(S0.i16)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (signext(S0.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if SCC then + # D0.i32 = 32'I(signext(S0.i16)) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + if SCC: + D0.i32 = (signext(S0.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 == 32'I(signext(S1.i16)) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 == (signext(S1.i16))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 != 32'I(signext(S1.i16)) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 != (signext(S1.i16))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 > 32'I(signext(S1.i16)) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 > (signext(S1.i16))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 >= 32'I(signext(S1.i16)) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 >= (signext(S1.i16))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 < 32'I(signext(S1.i16)) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 < (signext(S1.i16))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 <= 32'I(signext(S1.i16)) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 <= (signext(S1.i16))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 == 32'U(S1.u16) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 == (S1.u16)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 != 32'U(S1.u16) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 != (S1.u16)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 > 32'U(S1.u16) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 > (S1.u16)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 >= 32'U(S1.u16) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 >= (S1.u16)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 < 32'U(S1.u16) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 < (S1.u16)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 <= 32'U(S1.u16) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 <= (S1.u16)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.i32; + # D0.i32 = D0.i32 + 32'I(signext(S0.i16)); + # SCC = ((tmp[31] == S0.i16[15]) && (tmp[31] != D0.i32[31])); + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.i32) + D0.i32 = D0.i32 + (signext(S0.i16)) + SCC = Reg(((tmp[31] == S0.i16[15]) and (tmp[31] != D0.i32[31]))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = D0.i32 * 32'I(signext(S0.i16)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = D0.i32 * (signext(S0.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +SOPKOp_FUNCTIONS = { + SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, + SOPKOp.S_CMOVK_I32: _SOPKOp_S_CMOVK_I32, + SOPKOp.S_CMPK_EQ_I32: _SOPKOp_S_CMPK_EQ_I32, + SOPKOp.S_CMPK_LG_I32: _SOPKOp_S_CMPK_LG_I32, + SOPKOp.S_CMPK_GT_I32: _SOPKOp_S_CMPK_GT_I32, + SOPKOp.S_CMPK_GE_I32: _SOPKOp_S_CMPK_GE_I32, + SOPKOp.S_CMPK_LT_I32: _SOPKOp_S_CMPK_LT_I32, + SOPKOp.S_CMPK_LE_I32: _SOPKOp_S_CMPK_LE_I32, + SOPKOp.S_CMPK_EQ_U32: _SOPKOp_S_CMPK_EQ_U32, + SOPKOp.S_CMPK_LG_U32: _SOPKOp_S_CMPK_LG_U32, + SOPKOp.S_CMPK_GT_U32: _SOPKOp_S_CMPK_GT_U32, + SOPKOp.S_CMPK_GE_U32: _SOPKOp_S_CMPK_GE_U32, + SOPKOp.S_CMPK_LT_U32: _SOPKOp_S_CMPK_LT_U32, + SOPKOp.S_CMPK_LE_U32: _SOPKOp_S_CMPK_LE_U32, + SOPKOp.S_ADDK_I32: _SOPKOp_S_ADDK_I32, + SOPKOp.S_MULK_I32: _SOPKOp_S_MULK_I32, +} + +def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # for i in 0U : SIMM16.u16[3 : 0].u32 do + # endfor + SIMM16 = Reg(literal) + # --- compiled pseudocode --- + for i in range(0, int(SIMM16.u16[3 : 0].u32)+1): + pass + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // PC passed into trap handler points to S_TRAP itself, + # // trap base address + # --- compiled pseudocode --- + + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _SOPPOp_S_SET_GPR_IDX_MODE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Get Doorbell ID 10 - Returns doorbell into EXEC, with the doorbell physical address in bits + EXEC = Reg(exec_mask) + # --- compiled pseudocode --- + + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +SOPPOp_FUNCTIONS = { + SOPPOp.S_NOP: _SOPPOp_S_NOP, + SOPPOp.S_TRAP: _SOPPOp_S_TRAP, + SOPPOp.S_SET_GPR_IDX_MODE: _SOPPOp_S_SET_GPR_IDX_MODE, +} + +def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.b32 = S0.b32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b32 = S0.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare lane : 32'I; + # if EXEC == 0x0LL then + # lane = 0; + # // Force lane 0 if all lanes are disabled + # else + # lane = s_ff1_i32_b64(EXEC); + # // Lowest active lane + # endif; + # D0.b32 = VGPR[lane][SRC0.u32] + D0 = Reg(d0) + EXEC = Reg(exec_mask) + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + if EXEC == 0x0: + lane = 0 + else: + lane = s_ff1_i32_b64(EXEC) + D0.b32 = VGPR[lane][SRC0.u32] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f64_to_i32(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f64_to_i32(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = i32_to_f64(S0.i32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = i32_to_f64(S0.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = i32_to_f32(S0.i32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = i32_to_f32(S0.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0.u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = f32_to_u32(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = f32_to_u32(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = f32_to_f16(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = f32_to_f16(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = f16_to_f32(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = f16_to_f32(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(floor(S0.f32)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(floor(S0.f32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = f64_to_f32(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = f64_to_f32(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = f32_to_f64(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = f32_to_f64(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[7 : 0].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[7 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[15 : 8].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[15 : 8].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[23 : 16].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[23 : 16].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[31 : 24].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[31 : 24].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = f64_to_u32(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = f64_to_u32(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = u32_to_f64(S0.u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = u32_to_f64(S0.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = trunc(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = trunc(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = trunc(S0.f64); + # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then + # D0.f64 += 1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = trunc(S0.f64) + if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): + D0.f64 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = floor(S0.f64 + 0.5); + # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then + # D0.f64 -= 1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = floor(S0.f64 + 0.5) + if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): + D0.f64 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = trunc(S0.f64); + # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then + # D0.f64 += -1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = trunc(S0.f64) + if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): + D0.f64 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 + -floor(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 + -floor(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32); + # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then + # D0.f32 += 1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): + D0.f32 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = floor(S0.f32 + 0.5F); + # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then + # D0.f32 -= 1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = floor(S0.f32 + 0.5) + if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): + D0.f32 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32); + # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then + # D0.f32 += -1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): + D0.f32 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = pow(2.0F, S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = pow(2.0, S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = log2(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = log2(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / S0.f32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / S0.f32; + # // Can only raise integer DIV_BY_ZERO exception + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / sqrt(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / sqrt(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = 1.0 / S0.f64 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = 1.0 / S0.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = 1.0 / sqrt(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = 1.0 / sqrt(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = sqrt(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = sqrt(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = sqrt(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = sqrt(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = sin(S0.f32 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = cos(S0.f32 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~S0.u32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ~S0.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32[31 : 0] = S0.u32[0 : 31] + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32[31 : 0] = S0.u32[0 : 31] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = -1; + # // Set if no ones are found + # for i in 0 : 31 do + # // Search from MSB + # if S0.u32[31 - i] == 1'1U then + # D0.i32 = i; + # endif + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = -1 + for i in range(0, int(31)+1): + if S0.u32[31 - i] == 1: + D0.i32 = i + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = -1; + # // Set if no ones are found + # for i in 0 : 31 do + # // Search from LSB + # if S0.u32[i] == 1'1U then + # D0.i32 = i; + # endif + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = -1 + for i in range(0, int(31)+1): + if S0.u32[i] == 1: + D0.i32 = i + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = -1; + # // Set if all bits are the same + # for i in 1 : 31 do + # // Search from MSB + # if S0.i32[31 - i] != S0.i32[31] then + # D0.i32 = i; + # endif + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = -1 + for i in range(1, int(31)+1): + if S0.i32[31 - i] != S0.i32[31]: + D0.i32 = i + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then + # D0.i32 = 0 + # else + # D0.i32 = exponent(S0.f64) - 1023 + 1 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): + D0.i32 = 0 + else: + D0.i32 = exponent(S0.f64) - 1023 + 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then + # D0.f64 = S0.f64 + # else + # D0.f64 = mantissa(S0.f64) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): + D0.f64 = S0.f64 + else: + D0.f64 = mantissa(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = S0.f64 + -floor(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = S0.f64 + -floor(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then + # D0.i32 = 0 + # else + # D0.i32 = exponent(S0.f32) - 127 + 1 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): + D0.i32 = 0 + else: + D0.i32 = exponent(S0.f32) - 127 + 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then + # D0.f32 = S0.f32 + # else + # D0.f32 = mantissa(S0.f32) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = mantissa(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.b64 = S0.b64 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b64 = S0.b64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = u16_to_f16(S0.u16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = u16_to_f16(S0.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = i16_to_f16(S0.i16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = i16_to_f16(S0.i16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = f16_to_u16(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = f16_to_u16(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = f16_to_i16(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = f16_to_i16(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = 16'1.0 / S0.f16 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = 1.0 / S0.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = sqrt(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = sqrt(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = 16'1.0 / sqrt(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = 1.0 / sqrt(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = log2(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = log2(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = pow(16'2.0, S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = pow(2.0, S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then + # D0.f16 = S0.f16 + # else + # D0.f16 = mantissa(S0.f16) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = mantissa(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then + # D0.i16 = 16'0 + # else + # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): + D0.i16 = 0 + else: + D0.i16 = (exponent(S0.f16) - 15 + 1) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = trunc(S0.f16); + # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then + # D0.f16 += -16'1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = trunc(S0.f16) + if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): + D0.f16 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = trunc(S0.f16); + # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then + # D0.f16 += 16'1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = trunc(S0.f16) + if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): + D0.f16 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = trunc(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = trunc(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = floor(S0.f16 + 16'0.5); + # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then + # D0.f16 -= 16'1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = floor(S0.f16 + 0.5) + if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): + D0.f16 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 + -floor(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 + -floor(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = sin(S0.f16 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = cos(S0.f16 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = f16_to_snorm(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = f16_to_snorm(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = f16_to_unorm(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = f16_to_unorm(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if n <= 16'0 then + # elsif n >= 16'255 then + # else + # endif); + # tmp = 16'0; + # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); + # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); + # D0.b16 = tmp.b16 + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + if n <= 0: + pass + elif n >= 255: + pass + else: + pass + tmp = Reg(0) + tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) + tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) + D0.b16 = tmp.b16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.b32; + # D0.b32 = S0.b32; + # S0.b32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.b32) + D0.b32 = S0.b32 + S0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if SDWA_SRC0_SEL == BYTE1.b3 then + # D0.f32 = fp8_to_f32(S0[15 : 8].fp8) + # elsif SDWA_SRC0_SEL == BYTE2.b3 then + # D0.f32 = fp8_to_f32(S0[23 : 16].fp8) + # elsif SDWA_SRC0_SEL == BYTE3.b3 then + # D0.f32 = fp8_to_f32(S0[31 : 24].fp8) + # else + # // BYTE0 implied + # D0.f32 = fp8_to_f32(S0[7 : 0].fp8) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + if SDWA_SRC0_SEL == BYTE1.b3: + D0.f32 = fp8_to_f32(S0[15 : 8].fp8) + elif SDWA_SRC0_SEL == BYTE2.b3: + D0.f32 = fp8_to_f32(S0[23 : 16].fp8) + elif SDWA_SRC0_SEL == BYTE3.b3: + D0.f32 = fp8_to_f32(S0[31 : 24].fp8) + else: + D0.f32 = fp8_to_f32(S0[7 : 0].fp8) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if SDWA_SRC0_SEL == BYTE1.b3 then + # D0.f32 = bf8_to_f32(S0[15 : 8].bf8) + # elsif SDWA_SRC0_SEL == BYTE2.b3 then + # D0.f32 = bf8_to_f32(S0[23 : 16].bf8) + # elsif SDWA_SRC0_SEL == BYTE3.b3 then + # D0.f32 = bf8_to_f32(S0[31 : 24].bf8) + # else + # // BYTE0 implied + # D0.f32 = bf8_to_f32(S0[7 : 0].bf8) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + if SDWA_SRC0_SEL == BYTE1.b3: + D0.f32 = bf8_to_f32(S0[15 : 8].bf8) + elif SDWA_SRC0_SEL == BYTE2.b3: + D0.f32 = bf8_to_f32(S0[23 : 16].bf8) + elif SDWA_SRC0_SEL == BYTE3.b3: + D0.f32 = bf8_to_f32(S0[31 : 24].bf8) + else: + D0.f32 = bf8_to_f32(S0[7 : 0].bf8) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = SDWA_SRC0_SEL[1 : 0] == WORD1.b2 ? S0[31 : 16] : S0[15 : 0]; + # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); + # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) + S0 = Reg(s0) + D0 = Reg(d0) + D1 = Reg(0) + tmp = Reg(0) + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + tmp = Reg(((S0[31 : 16]) if (SDWA_SRC0_SEL[1 : 0] == WORD1.b2) else (S0[15 : 0]))) + D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8) + D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = SDWA_SRC0_SEL[1 : 0] == WORD1.b2 ? S0[31 : 16] : S0[15 : 0]; + # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); + # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) + S0 = Reg(s0) + D0 = Reg(d0) + D1 = Reg(0) + tmp = Reg(0) + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + tmp = Reg(((S0[31 : 16]) if (SDWA_SRC0_SEL[1 : 0] == WORD1.b2) else (S0[15 : 0]))) + D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8) + D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_PERMLANE16_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # for pass in 0 : 1 do + # for lane in 0 : 15 do + # tmp = VGPR[pass * 32 + lane][SRC0.u32]; + # endfor + # endfor + tmp = Reg(0) + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + for pass in range(0, int(1)+1): + for lane in range(0, int(15)+1): + tmp = Reg(VGPR[pass * 32 + lane][SRC0.u32]) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP1Op_V_PERMLANE32_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # for lane in 0 : 31 do + # tmp = VGPR[lane][SRC0.u32]; + # endfor + tmp = Reg(0) + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + for lane in range(0, int(31)+1): + tmp = Reg(VGPR[lane][SRC0.u32]) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 32'F({ S0.b16, 16'0U }) + # V_CMPX_{COMPF}_F16 16-bit float compare. Also writes EXEC. 0x30 to 0x3F + # V_CMPX_{COMPF}_F32 32-bit float compare. Also writes EXEC. 0x50 to 0x5F + # V_CMPSX_{COMPF}_F64 64-bit float compare. Also writes EXEC. 0x70 to 0x7F + # V_CMPX_{COMPI}_I16 16-bit unsigned integer compare. Also writes EXEC. 0xB0 - 0xB7 + # V_CMPX_{COMPI}_U16 16-bit unsigned integer compare. Also writes EXEC. 0xB8 - 0xBF + # V_CMPX_{COMPI}_I32 32-bit unsigned integer compare. Also writes EXEC. 0xD0 - 0xD7 + # V_CMPX_{COMPI}_U32 32-bit unsigned integer compare. Also writes EXEC. 0xD8 - 0xDF + # V_CMPX_{COMPI}_I64 64-bit unsigned integer compare. Also writes EXEC. 0xF0 - 0xF7 + # V_CMPX_{COMPI}_U64 64-bit unsigned integer compare. Also writes EXEC. 0xF8 - 0xFF + S0 = Reg(s0) + D0 = Reg(d0) + EXEC = Reg(exec_mask) + # --- compiled pseudocode --- + D0.f32 = F(_pack(S0.b16, 0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +VOP1Op_FUNCTIONS = { + VOP1Op.V_MOV_B32: _VOP1Op_V_MOV_B32, + VOP1Op.V_READFIRSTLANE_B32: _VOP1Op_V_READFIRSTLANE_B32, + VOP1Op.V_CVT_I32_F64: _VOP1Op_V_CVT_I32_F64, + VOP1Op.V_CVT_F64_I32: _VOP1Op_V_CVT_F64_I32, + VOP1Op.V_CVT_F32_I32: _VOP1Op_V_CVT_F32_I32, + VOP1Op.V_CVT_F32_U32: _VOP1Op_V_CVT_F32_U32, + VOP1Op.V_CVT_U32_F32: _VOP1Op_V_CVT_U32_F32, + VOP1Op.V_CVT_I32_F32: _VOP1Op_V_CVT_I32_F32, + VOP1Op.V_CVT_F16_F32: _VOP1Op_V_CVT_F16_F32, + VOP1Op.V_CVT_F32_F16: _VOP1Op_V_CVT_F32_F16, + VOP1Op.V_CVT_RPI_I32_F32: _VOP1Op_V_CVT_RPI_I32_F32, + VOP1Op.V_CVT_FLR_I32_F32: _VOP1Op_V_CVT_FLR_I32_F32, + VOP1Op.V_CVT_F32_F64: _VOP1Op_V_CVT_F32_F64, + VOP1Op.V_CVT_F64_F32: _VOP1Op_V_CVT_F64_F32, + VOP1Op.V_CVT_F32_UBYTE0: _VOP1Op_V_CVT_F32_UBYTE0, + VOP1Op.V_CVT_F32_UBYTE1: _VOP1Op_V_CVT_F32_UBYTE1, + VOP1Op.V_CVT_F32_UBYTE2: _VOP1Op_V_CVT_F32_UBYTE2, + VOP1Op.V_CVT_F32_UBYTE3: _VOP1Op_V_CVT_F32_UBYTE3, + VOP1Op.V_CVT_U32_F64: _VOP1Op_V_CVT_U32_F64, + VOP1Op.V_CVT_F64_U32: _VOP1Op_V_CVT_F64_U32, + VOP1Op.V_TRUNC_F64: _VOP1Op_V_TRUNC_F64, + VOP1Op.V_CEIL_F64: _VOP1Op_V_CEIL_F64, + VOP1Op.V_RNDNE_F64: _VOP1Op_V_RNDNE_F64, + VOP1Op.V_FLOOR_F64: _VOP1Op_V_FLOOR_F64, + VOP1Op.V_FRACT_F32: _VOP1Op_V_FRACT_F32, + VOP1Op.V_TRUNC_F32: _VOP1Op_V_TRUNC_F32, + VOP1Op.V_CEIL_F32: _VOP1Op_V_CEIL_F32, + VOP1Op.V_RNDNE_F32: _VOP1Op_V_RNDNE_F32, + VOP1Op.V_FLOOR_F32: _VOP1Op_V_FLOOR_F32, + VOP1Op.V_EXP_F32: _VOP1Op_V_EXP_F32, + VOP1Op.V_LOG_F32: _VOP1Op_V_LOG_F32, + VOP1Op.V_RCP_F32: _VOP1Op_V_RCP_F32, + VOP1Op.V_RCP_IFLAG_F32: _VOP1Op_V_RCP_IFLAG_F32, + VOP1Op.V_RSQ_F32: _VOP1Op_V_RSQ_F32, + VOP1Op.V_RCP_F64: _VOP1Op_V_RCP_F64, + VOP1Op.V_RSQ_F64: _VOP1Op_V_RSQ_F64, + VOP1Op.V_SQRT_F32: _VOP1Op_V_SQRT_F32, + VOP1Op.V_SQRT_F64: _VOP1Op_V_SQRT_F64, + VOP1Op.V_SIN_F32: _VOP1Op_V_SIN_F32, + VOP1Op.V_COS_F32: _VOP1Op_V_COS_F32, + VOP1Op.V_NOT_B32: _VOP1Op_V_NOT_B32, + VOP1Op.V_BFREV_B32: _VOP1Op_V_BFREV_B32, + VOP1Op.V_FFBH_U32: _VOP1Op_V_FFBH_U32, + VOP1Op.V_FFBL_B32: _VOP1Op_V_FFBL_B32, + VOP1Op.V_FFBH_I32: _VOP1Op_V_FFBH_I32, + VOP1Op.V_FREXP_EXP_I32_F64: _VOP1Op_V_FREXP_EXP_I32_F64, + VOP1Op.V_FREXP_MANT_F64: _VOP1Op_V_FREXP_MANT_F64, + VOP1Op.V_FRACT_F64: _VOP1Op_V_FRACT_F64, + VOP1Op.V_FREXP_EXP_I32_F32: _VOP1Op_V_FREXP_EXP_I32_F32, + VOP1Op.V_FREXP_MANT_F32: _VOP1Op_V_FREXP_MANT_F32, + VOP1Op.V_MOV_B64: _VOP1Op_V_MOV_B64, + VOP1Op.V_CVT_F16_U16: _VOP1Op_V_CVT_F16_U16, + VOP1Op.V_CVT_F16_I16: _VOP1Op_V_CVT_F16_I16, + VOP1Op.V_CVT_U16_F16: _VOP1Op_V_CVT_U16_F16, + VOP1Op.V_CVT_I16_F16: _VOP1Op_V_CVT_I16_F16, + VOP1Op.V_RCP_F16: _VOP1Op_V_RCP_F16, + VOP1Op.V_SQRT_F16: _VOP1Op_V_SQRT_F16, + VOP1Op.V_RSQ_F16: _VOP1Op_V_RSQ_F16, + VOP1Op.V_LOG_F16: _VOP1Op_V_LOG_F16, + VOP1Op.V_EXP_F16: _VOP1Op_V_EXP_F16, + VOP1Op.V_FREXP_MANT_F16: _VOP1Op_V_FREXP_MANT_F16, + VOP1Op.V_FREXP_EXP_I16_F16: _VOP1Op_V_FREXP_EXP_I16_F16, + VOP1Op.V_FLOOR_F16: _VOP1Op_V_FLOOR_F16, + VOP1Op.V_CEIL_F16: _VOP1Op_V_CEIL_F16, + VOP1Op.V_TRUNC_F16: _VOP1Op_V_TRUNC_F16, + VOP1Op.V_RNDNE_F16: _VOP1Op_V_RNDNE_F16, + VOP1Op.V_FRACT_F16: _VOP1Op_V_FRACT_F16, + VOP1Op.V_SIN_F16: _VOP1Op_V_SIN_F16, + VOP1Op.V_COS_F16: _VOP1Op_V_COS_F16, + VOP1Op.V_CVT_NORM_I16_F16: _VOP1Op_V_CVT_NORM_I16_F16, + VOP1Op.V_CVT_NORM_U16_F16: _VOP1Op_V_CVT_NORM_U16_F16, + VOP1Op.V_SAT_PK_U8_I16: _VOP1Op_V_SAT_PK_U8_I16, + VOP1Op.V_SWAP_B32: _VOP1Op_V_SWAP_B32, + VOP1Op.V_CVT_F32_FP8: _VOP1Op_V_CVT_F32_FP8, + VOP1Op.V_CVT_F32_BF8: _VOP1Op_V_CVT_F32_BF8, + VOP1Op.V_CVT_PK_F32_FP8: _VOP1Op_V_CVT_PK_F32_FP8, + VOP1Op.V_CVT_PK_F32_BF8: _VOP1Op_V_CVT_PK_F32_BF8, + VOP1Op.V_PERMLANE16_SWAP_B32: _VOP1Op_V_PERMLANE16_SWAP_B32, + VOP1Op.V_PERMLANE32_SWAP_B32: _VOP1Op_V_PERMLANE32_SWAP_B32, + VOP1Op.V_CVT_F32_BF16: _VOP1Op_V_CVT_F32_BF16, +} + +def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 + S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 + S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 - S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 - S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S1.f32 - S0.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S1.f32 - S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = fma(S0.f64, S1.f64, D0.f64) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = fma(S0.f64, S1.f64, D0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 * S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 * S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (S0.i24) * (S1.i24) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (((S0.i24) * (S1.i24)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u24) * (S1.u24) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (((S0.u24) * (S1.u24)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) + # elsif isNAN(64'F(S0.f32)) then + # D0.f32 = S1.f32 + # elsif isNAN(64'F(S1.f32)) then + # D0.f32 = S0.f32 + # elsif ((64'F(S0.f32) == +0.0) && (64'F(S1.f32) == -0.0)) then + # D0.f32 = S1.f32 + # elsif ((64'F(S0.f32) == -0.0) && (64'F(S1.f32) == +0.0)) then + # D0.f32 = S0.f32 + # else + # D0.f32 = S0.f32 < S1.f32 ? S0.f32 : S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))): + D0.f32 = F(cvtToQuietNAN(F(S1.f32))) + elif isNAN(F(S0.f32)): + D0.f32 = S1.f32 + elif isNAN(F(S1.f32)): + D0.f32 = S0.f32 + elif ((F(S0.f32) == +0.0) and (F(S1.f32) == -0.0)): + D0.f32 = S1.f32 + elif ((F(S0.f32) == -0.0) and (F(S1.f32) == +0.0)): + D0.f32 = S0.f32 + else: + D0.f32 = ((S0.f32) if (S0.f32 < S1.f32) else (S1.f32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) + # elsif isNAN(64'F(S0.f32)) then + # D0.f32 = S1.f32 + # elsif isNAN(64'F(S1.f32)) then + # D0.f32 = S0.f32 + # elsif ((64'F(S0.f32) == +0.0) && (64'F(S1.f32) == -0.0)) then + # D0.f32 = S0.f32 + # elsif ((64'F(S0.f32) == -0.0) && (64'F(S1.f32) == +0.0)) then + # D0.f32 = S1.f32 + # elsif WAVE_MODE.IEEE then + # D0.f32 = S0.f32 >= S1.f32 ? S0.f32 : S1.f32 + # else + # D0.f32 = S0.f32 > S1.f32 ? S0.f32 : S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))): + D0.f32 = F(cvtToQuietNAN(F(S1.f32))) + elif isNAN(F(S0.f32)): + D0.f32 = S1.f32 + elif isNAN(F(S1.f32)): + D0.f32 = S0.f32 + elif ((F(S0.f32) == +0.0) and (F(S1.f32) == -0.0)): + D0.f32 = S0.f32 + elif ((F(S0.f32) == -0.0) and (F(S1.f32) == +0.0)): + D0.f32 = S1.f32 + elif WAVE_MODE.IEEE: + D0.f32 = ((S0.f32) if (S0.f32 >= S1.f32) else (S1.f32)) + else: + D0.f32 = ((S0.f32) if (S0.f32 > S1.f32) else (S1.f32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S1.u32 >> S0[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S1.u32 >> S0[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = (S1.i32 >> S0[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (S1.i32 >> S0[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S1.u32 << S0[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S1.u32 << S0[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 & S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 & S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 | S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 | S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 ^ S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 ^ S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SIMM32 = Reg(literal) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SIMM32 = Reg(literal) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 64'U(S0.u32) + 64'U(S1.u32); + # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg((S0.u32) + (S1.u32)) + VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP2Op_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32 - S1.u32; + # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S0.u32 - S1.u32) + VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP2Op_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S1.u32 - S0.u32; + # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S1.u32 - S0.u32) + VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP2Op_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; + # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) + VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP2Op_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; + # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) + VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP2Op_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; + # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) + VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 + S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 + S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 - S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 - S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S1.f16 - S0.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S1.f16 - S0.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 * S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 * S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.f16 * S1.f16 + D0.f16; + # if OPSEL.u4[3] then + # D0 = { tmp.f16, D0[15 : 0] } + # else + # D0 = { 16'0, tmp.f16 } + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.f16 * S1.f16 + D0.f16) + if OPSEL.u4[3]: + D0 = Reg(_pack(tmp.f16, D0[15 : 0])) + else: + D0 = Reg(_pack(0, tmp.f16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MADMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.f16 * SIMM16.f16 + S1.f16; + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + SIMM16 = Reg(literal) + # --- compiled pseudocode --- + tmp = Reg(S0.f16 * SIMM16.f16 + S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP2Op_V_MADAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.f16 * S1.f16 + SIMM16.f16; + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + SIMM16 = Reg(literal) + # --- compiled pseudocode --- + tmp = Reg(S0.f16 * S1.f16 + SIMM16.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP2Op_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 + S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S0.u16 + S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 - S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S0.u16 - S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S1.u16 - S0.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S1.u16 - S0.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 * S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S0.u16 * S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = (S1.u16 << S0[3 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = (S1.u16 << S0[3 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = (S1.u16 >> S0[3 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = (S1.u16 >> S0[3 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = (S1.i16 >> S0[3 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = (S1.i16 >> S0[3 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) + # elsif isNAN(64'F(S0.f16)) then + # D0.f16 = S1.f16 + # elsif isNAN(64'F(S1.f16)) then + # D0.f16 = S0.f16 + # elsif ((64'F(S0.f16) == +0.0) && (64'F(S1.f16) == -0.0)) then + # D0.f16 = S0.f16 + # elsif ((64'F(S0.f16) == -0.0) && (64'F(S1.f16) == +0.0)) then + # D0.f16 = S1.f16 + # elsif WAVE_MODE.IEEE then + # D0.f16 = S0.f16 >= S1.f16 ? S0.f16 : S1.f16 + # else + # D0.f16 = S0.f16 > S1.f16 ? S0.f16 : S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))): + D0.f16 = F(cvtToQuietNAN(F(S1.f16))) + elif isNAN(F(S0.f16)): + D0.f16 = S1.f16 + elif isNAN(F(S1.f16)): + D0.f16 = S0.f16 + elif ((F(S0.f16) == +0.0) and (F(S1.f16) == -0.0)): + D0.f16 = S0.f16 + elif ((F(S0.f16) == -0.0) and (F(S1.f16) == +0.0)): + D0.f16 = S1.f16 + elif WAVE_MODE.IEEE: + D0.f16 = ((S0.f16) if (S0.f16 >= S1.f16) else (S1.f16)) + else: + D0.f16 = ((S0.f16) if (S0.f16 > S1.f16) else (S1.f16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) + # elsif isNAN(64'F(S0.f16)) then + # D0.f16 = S1.f16 + # elsif isNAN(64'F(S1.f16)) then + # D0.f16 = S0.f16 + # elsif ((64'F(S0.f16) == +0.0) && (64'F(S1.f16) == -0.0)) then + # D0.f16 = S1.f16 + # elsif ((64'F(S0.f16) == -0.0) && (64'F(S1.f16) == +0.0)) then + # D0.f16 = S0.f16 + # else + # D0.f16 = S0.f16 < S1.f16 ? S0.f16 : S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))): + D0.f16 = F(cvtToQuietNAN(F(S1.f16))) + elif isNAN(F(S0.f16)): + D0.f16 = S1.f16 + elif isNAN(F(S1.f16)): + D0.f16 = S0.f16 + elif ((F(S0.f16) == +0.0) and (F(S1.f16) == -0.0)): + D0.f16 = S1.f16 + elif ((F(S0.f16) == -0.0) and (F(S1.f16) == +0.0)): + D0.f16 = S0.f16 + else: + D0.f16 = ((S0.f16) if (S0.f16 < S1.f16) else (S1.f16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 + S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S0.u32 + S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 - S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S0.u32 - S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S1.u32 - S0.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S1.u32 - S0.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.f32; + # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); + # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); + # D0.f32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.f32) + tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) + tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) + D0.f32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.i32; + # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); + # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); + # D0.i32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.i32) + tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16) + tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.i32; + # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); + # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); + # tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8); + # tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8); + # D0.i32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.i32) + tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8) + tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8) + tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8) + tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.i32; + # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); + # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); + # tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4); + # tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4); + # tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4); + # tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4); + # tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4); + # tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4); + # D0.i32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.i32) + tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4) + tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4) + tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4) + tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4) + tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4) + tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4) + tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4) + tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, S1.f32, D0.f32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, S1.f32, D0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16); + # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) + D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~(S0.u32 ^ S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ~(S0.u32 ^ S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +VOP2Op_FUNCTIONS = { + VOP2Op.V_CNDMASK_B32: _VOP2Op_V_CNDMASK_B32, + VOP2Op.V_ADD_F32: _VOP2Op_V_ADD_F32, + VOP2Op.V_SUB_F32: _VOP2Op_V_SUB_F32, + VOP2Op.V_SUBREV_F32: _VOP2Op_V_SUBREV_F32, + VOP2Op.V_FMAC_F64: _VOP2Op_V_FMAC_F64, + VOP2Op.V_MUL_F32: _VOP2Op_V_MUL_F32, + VOP2Op.V_MUL_I32_I24: _VOP2Op_V_MUL_I32_I24, + VOP2Op.V_MUL_HI_I32_I24: _VOP2Op_V_MUL_HI_I32_I24, + VOP2Op.V_MUL_U32_U24: _VOP2Op_V_MUL_U32_U24, + VOP2Op.V_MUL_HI_U32_U24: _VOP2Op_V_MUL_HI_U32_U24, + VOP2Op.V_MIN_F32: _VOP2Op_V_MIN_F32, + VOP2Op.V_MAX_F32: _VOP2Op_V_MAX_F32, + VOP2Op.V_MIN_I32: _VOP2Op_V_MIN_I32, + VOP2Op.V_MAX_I32: _VOP2Op_V_MAX_I32, + VOP2Op.V_MIN_U32: _VOP2Op_V_MIN_U32, + VOP2Op.V_MAX_U32: _VOP2Op_V_MAX_U32, + VOP2Op.V_LSHRREV_B32: _VOP2Op_V_LSHRREV_B32, + VOP2Op.V_ASHRREV_I32: _VOP2Op_V_ASHRREV_I32, + VOP2Op.V_LSHLREV_B32: _VOP2Op_V_LSHLREV_B32, + VOP2Op.V_AND_B32: _VOP2Op_V_AND_B32, + VOP2Op.V_OR_B32: _VOP2Op_V_OR_B32, + VOP2Op.V_XOR_B32: _VOP2Op_V_XOR_B32, + VOP2Op.V_FMAMK_F32: _VOP2Op_V_FMAMK_F32, + VOP2Op.V_FMAAK_F32: _VOP2Op_V_FMAAK_F32, + VOP2Op.V_ADD_CO_U32: _VOP2Op_V_ADD_CO_U32, + VOP2Op.V_SUB_CO_U32: _VOP2Op_V_SUB_CO_U32, + VOP2Op.V_SUBREV_CO_U32: _VOP2Op_V_SUBREV_CO_U32, + VOP2Op.V_ADDC_CO_U32: _VOP2Op_V_ADDC_CO_U32, + VOP2Op.V_SUBB_CO_U32: _VOP2Op_V_SUBB_CO_U32, + VOP2Op.V_SUBBREV_CO_U32: _VOP2Op_V_SUBBREV_CO_U32, + VOP2Op.V_ADD_F16: _VOP2Op_V_ADD_F16, + VOP2Op.V_SUB_F16: _VOP2Op_V_SUB_F16, + VOP2Op.V_SUBREV_F16: _VOP2Op_V_SUBREV_F16, + VOP2Op.V_MUL_F16: _VOP2Op_V_MUL_F16, + VOP2Op.V_MAC_F16: _VOP2Op_V_MAC_F16, + VOP2Op.V_MADMK_F16: _VOP2Op_V_MADMK_F16, + VOP2Op.V_MADAK_F16: _VOP2Op_V_MADAK_F16, + VOP2Op.V_ADD_U16: _VOP2Op_V_ADD_U16, + VOP2Op.V_SUB_U16: _VOP2Op_V_SUB_U16, + VOP2Op.V_SUBREV_U16: _VOP2Op_V_SUBREV_U16, + VOP2Op.V_MUL_LO_U16: _VOP2Op_V_MUL_LO_U16, + VOP2Op.V_LSHLREV_B16: _VOP2Op_V_LSHLREV_B16, + VOP2Op.V_LSHRREV_B16: _VOP2Op_V_LSHRREV_B16, + VOP2Op.V_ASHRREV_I16: _VOP2Op_V_ASHRREV_I16, + VOP2Op.V_MAX_F16: _VOP2Op_V_MAX_F16, + VOP2Op.V_MIN_F16: _VOP2Op_V_MIN_F16, + VOP2Op.V_MAX_U16: _VOP2Op_V_MAX_U16, + VOP2Op.V_MAX_I16: _VOP2Op_V_MAX_I16, + VOP2Op.V_MIN_U16: _VOP2Op_V_MIN_U16, + VOP2Op.V_MIN_I16: _VOP2Op_V_MIN_I16, + VOP2Op.V_LDEXP_F16: _VOP2Op_V_LDEXP_F16, + VOP2Op.V_ADD_U32: _VOP2Op_V_ADD_U32, + VOP2Op.V_SUB_U32: _VOP2Op_V_SUB_U32, + VOP2Op.V_SUBREV_U32: _VOP2Op_V_SUBREV_U32, + VOP2Op.V_DOT2C_F32_F16: _VOP2Op_V_DOT2C_F32_F16, + VOP2Op.V_DOT2C_I32_I16: _VOP2Op_V_DOT2C_I32_I16, + VOP2Op.V_DOT4C_I32_I8: _VOP2Op_V_DOT4C_I32_I8, + VOP2Op.V_DOT8C_I32_I4: _VOP2Op_V_DOT8C_I32_I4, + VOP2Op.V_FMAC_F32: _VOP2Op_V_FMAC_F32, + VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16, + VOP2Op.V_XNOR_B32: _VOP2Op_V_XNOR_B32, +} + +def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; + # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16 + tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; + # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; + # D0.b32 = tmp.b32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + D0.b32 = tmp.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; + # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16 + tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; + # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16 + tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); + # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); + # D0.b32 = tmp.b32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32) + tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32) + D0.b32 = tmp.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); + # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); + # D0.b32 = tmp.b32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32) + tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32) + D0.b32 = tmp.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); + # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); + # D0.b32 = tmp.b32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32) + tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32) + D0.b32 = tmp.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; + # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 >= S1[15 : 0].i16) else (S1[15 : 0].i16)) + tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 >= S1[31 : 16].i16) else (S1[31 : 16].i16)) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; + # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 < S1[15 : 0].i16) else (S1[15 : 0].i16)) + tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 < S1[31 : 16].i16) else (S1[31 : 16].i16)) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; + # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16 + tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; + # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16 + tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; + # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16 + tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; + # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 >= S1[15 : 0].u16) else (S1[15 : 0].u16)) + tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 >= S1[31 : 16].u16) else (S1[31 : 16].u16)) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; + # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 < S1[15 : 0].u16) else (S1[15 : 0].u16)) + tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 < S1[31 : 16].u16) else (S1[31 : 16].u16)) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); + # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16) + tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; + # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16 + tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; + # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16 + tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16); + # tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16); + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16) + tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16); + # tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16); + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16) + tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp[31 : 16].f16 = 16'F(v_minimum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)); + # tmp[15 : 0].f16 = 16'F(v_minimum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)); + # D0.b32 = tmp.b32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 16].f16 = F(v_minimum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)) + tmp[15 : 0].f16 = F(v_minimum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)) + D0.b32 = tmp.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp[31 : 16].f16 = 16'F(v_maximum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)); + # tmp[15 : 0].f16 = 16'F(v_maximum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)); + # D0.b32 = tmp.b32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 16].f16 = F(v_maximum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)) + tmp[15 : 0].f16 = F(v_maximum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)) + D0.b32 = tmp.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.f32; + # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); + # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); + # D0.f32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.f32) + tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) + tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) + D0.f32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT2_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.i32; + # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); + # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); + # D0.i32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.i32) + tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16) + tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT2_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.u32; + # tmp += u16_to_u32(S0[15 : 0].u16) * u16_to_u32(S1[15 : 0].u16); + # tmp += u16_to_u32(S0[31 : 16].u16) * u16_to_u32(S1[31 : 16].u16); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.u32) + tmp += u16_to_u32(S0[15 : 0].u16) * u16_to_u32(S1[15 : 0].u16) + tmp += u16_to_u32(S0[31 : 16].u16) * u16_to_u32(S1[31 : 16].u16) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT4_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.i32; + # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); + # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); + # tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8); + # tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8); + # D0.i32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.i32) + tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8) + tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8) + tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8) + tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.u32; + # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); + # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); + # tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8); + # tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.u32) + tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8) + tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8) + tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8) + tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT8_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.i32; + # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); + # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); + # tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4); + # tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4); + # tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4); + # tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4); + # tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4); + # tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4); + # D0.i32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.i32) + tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4) + tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4) + tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4) + tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4) + tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4) + tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4) + tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4) + tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.u32; + # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); + # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); + # tmp += u4_to_u32(S0[11 : 8].u4) * u4_to_u32(S1[11 : 8].u4); + # tmp += u4_to_u32(S0[15 : 12].u4) * u4_to_u32(S1[15 : 12].u4); + # tmp += u4_to_u32(S0[19 : 16].u4) * u4_to_u32(S1[19 : 16].u4); + # tmp += u4_to_u32(S0[23 : 20].u4) * u4_to_u32(S1[23 : 20].u4); + # tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4); + # tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.u32) + tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4) + tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4) + tmp += u4_to_u32(S0[11 : 8].u4) * u4_to_u32(S1[11 : 8].u4) + tmp += u4_to_u32(S0[15 : 12].u4) * u4_to_u32(S1[15 : 12].u4) + tmp += u4_to_u32(S0[19 : 16].u4) * u4_to_u32(S1[19 : 16].u4) + tmp += u4_to_u32(S0[23 : 20].u4) * u4_to_u32(S1[23 : 20].u4) + tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4) + tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 64'B; + # tmp[31 : 0].f32 = fma(S0[31 : 0].f32, S1[31 : 0].f32, S2[31 : 0].f32); + # tmp[63 : 32].f32 = fma(S0[63 : 32].f32, S1[63 : 32].f32, S2[63 : 32].f32); + # D0.b64 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 0].f32 = fma(S0[31 : 0].f32, S1[31 : 0].f32, S2[31 : 0].f32) + tmp[63 : 32].f32 = fma(S0[63 : 32].f32, S1[63 : 32].f32, S2[63 : 32].f32) + D0.b64 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3POp_V_PK_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 64'B; + # tmp[31 : 0].f32 = S0[31 : 0].f32 * S1[31 : 0].f32; + # tmp[63 : 32].f32 = S0[63 : 32].f32 * S1[63 : 32].f32; + # D0.b64 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 0].f32 = S0[31 : 0].f32 * S1[31 : 0].f32 + tmp[63 : 32].f32 = S0[63 : 32].f32 * S1[63 : 32].f32 + D0.b64 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3POp_V_PK_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 64'B; + # tmp[31 : 0].f32 = S0[31 : 0].f32 + S1[31 : 0].f32; + # tmp[63 : 32].f32 = S0[63 : 32].f32 + S1[63 : 32].f32; + # D0.b64 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 0].f32 = S0[31 : 0].f32 + S1[31 : 0].f32 + tmp[63 : 32].f32 = S0[63 : 32].f32 + S1[63 : 32].f32 + D0.b64 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3POp_V_PK_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp0.u32 = S0.u32[OPSEL[0].i32 * 32 + 31 : OPSEL[0].i32 * 32]; + # tmp1.u32 = S1.u32[OPSEL[1].i32 * 32 + 31 : OPSEL[1].i32 * 32]; + # D0.u32[31 : 0] = tmp0.u32; + # D0.u32[63 : 32] = tmp1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp0.u32 = S0.u32[OPSEL[0].i32 * 32 + 31 : OPSEL[0].i32 * 32] + tmp1.u32 = S1.u32[OPSEL[1].i32 * 32 + 31 : OPSEL[1].i32 * 32] + D0.u32[31 : 0] = tmp0.u32 + D0.u32[63 : 32] = tmp1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +VOP3POp_FUNCTIONS = { + VOP3POp.V_PK_MAD_I16: _VOP3POp_V_PK_MAD_I16, + VOP3POp.V_PK_MUL_LO_U16: _VOP3POp_V_PK_MUL_LO_U16, + VOP3POp.V_PK_ADD_I16: _VOP3POp_V_PK_ADD_I16, + VOP3POp.V_PK_SUB_I16: _VOP3POp_V_PK_SUB_I16, + VOP3POp.V_PK_LSHLREV_B16: _VOP3POp_V_PK_LSHLREV_B16, + VOP3POp.V_PK_LSHRREV_B16: _VOP3POp_V_PK_LSHRREV_B16, + VOP3POp.V_PK_ASHRREV_I16: _VOP3POp_V_PK_ASHRREV_I16, + VOP3POp.V_PK_MAX_I16: _VOP3POp_V_PK_MAX_I16, + VOP3POp.V_PK_MIN_I16: _VOP3POp_V_PK_MIN_I16, + VOP3POp.V_PK_MAD_U16: _VOP3POp_V_PK_MAD_U16, + VOP3POp.V_PK_ADD_U16: _VOP3POp_V_PK_ADD_U16, + VOP3POp.V_PK_SUB_U16: _VOP3POp_V_PK_SUB_U16, + VOP3POp.V_PK_MAX_U16: _VOP3POp_V_PK_MAX_U16, + VOP3POp.V_PK_MIN_U16: _VOP3POp_V_PK_MIN_U16, + VOP3POp.V_PK_FMA_F16: _VOP3POp_V_PK_FMA_F16, + VOP3POp.V_PK_ADD_F16: _VOP3POp_V_PK_ADD_F16, + VOP3POp.V_PK_MUL_F16: _VOP3POp_V_PK_MUL_F16, + VOP3POp.V_PK_MIN_F16: _VOP3POp_V_PK_MIN_F16, + VOP3POp.V_PK_MAX_F16: _VOP3POp_V_PK_MAX_F16, + VOP3POp.V_PK_MINIMUM3_F16: _VOP3POp_V_PK_MINIMUM3_F16, + VOP3POp.V_PK_MAXIMUM3_F16: _VOP3POp_V_PK_MAXIMUM3_F16, + VOP3POp.V_DOT2_F32_F16: _VOP3POp_V_DOT2_F32_F16, + VOP3POp.V_DOT2_I32_I16: _VOP3POp_V_DOT2_I32_I16, + VOP3POp.V_DOT2_U32_U16: _VOP3POp_V_DOT2_U32_U16, + VOP3POp.V_DOT4_I32_I8: _VOP3POp_V_DOT4_I32_I8, + VOP3POp.V_DOT4_U32_U8: _VOP3POp_V_DOT4_U32_U8, + VOP3POp.V_DOT8_I32_I4: _VOP3POp_V_DOT8_I32_I4, + VOP3POp.V_DOT8_U32_U4: _VOP3POp_V_DOT8_U32_U4, + VOP3POp.V_PK_FMA_F32: _VOP3POp_V_PK_FMA_F32, + VOP3POp.V_PK_MUL_F32: _VOP3POp_V_PK_MUL_F32, + VOP3POp.V_PK_ADD_F32: _VOP3POp_V_PK_ADD_F32, + VOP3POp.V_PK_MOV_B32: _VOP3POp_V_PK_MOV_B32, +} + +def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar + # S1.u[0] value is a signaling NAN. + # S1.u[1] value is a quiet NAN. + # S1.u[2] value is negative infinity. + # S1.u[3] value is a negative normal value. + # S1.u[4] value is a negative denormal value. + # S1.u[5] value is negative zero. + # S1.u[6] value is positive zero. + # S1.u[7] value is a positive denormal value. + # S1.u[8] value is a positive normal value. + # S1.u[9] value is positive infinity. + # declare result : 1'U; + # if isSignalNAN(64'F(S0.f32)) then + # result = S1.u32[0] + # elsif isQuietNAN(64'F(S0.f32)) then + # result = S1.u32[1] + # elsif exponent(S0.f32) == 255 then + # // +-INF + # result = S1.u32[sign(S0.f32) ? 2 : 9] + # elsif exponent(S0.f32) > 0 then + # // +-normal value + # result = S1.u32[sign(S0.f32) ? 3 : 8] + # elsif 64'F(abs(S0.f32)) > 0.0 then + # // +-denormal value + # result = S1.u32[sign(S0.f32) ? 4 : 7] + # else + # // +-0.0 + # result = S1.u32[sign(S0.f32) ? 5 : 6] + # endif; + # D0.u64[laneId] = result; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + if isSignalNAN(F(S0.f32)): + result = S1.u32[0] + elif isQuietNAN(F(S0.f32)): + result = S1.u32[1] + elif exponent(S0.f32) == 255: + result = S1.u32[((2) if (sign(S0.f32)) else (9))] + elif exponent(S0.f32) > 0: + result = S1.u32[((3) if (sign(S0.f32)) else (8))] + elif F(abs(S0.f32)) > 0.0: + result = S1.u32[((4) if (sign(S0.f32)) else (7))] + else: + result = S1.u32[((5) if (sign(S0.f32)) else (6))] + D0.u64[laneId] = result + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # single-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and + # S1.u[0] value is a signaling NAN. + # S1.u[1] value is a quiet NAN. + # S1.u[2] value is negative infinity. + # S1.u[3] value is a negative normal value. + # S1.u[4] value is a negative denormal value. + # S1.u[5] value is negative zero. + # S1.u[6] value is positive zero. + # S1.u[7] value is a positive denormal value. + # S1.u[8] value is a positive normal value. + # S1.u[9] value is positive infinity. + # declare result : 1'U; + # if isSignalNAN(64'F(S0.f32)) then + # result = S1.u32[0] + # elsif isQuietNAN(64'F(S0.f32)) then + # result = S1.u32[1] + # elsif exponent(S0.f32) == 255 then + # // +-INF + # result = S1.u32[sign(S0.f32) ? 2 : 9] + # elsif exponent(S0.f32) > 0 then + # // +-normal value + # result = S1.u32[sign(S0.f32) ? 3 : 8] + # elsif 64'F(abs(S0.f32)) > 0.0 then + # // +-denormal value + # result = S1.u32[sign(S0.f32) ? 4 : 7] + # else + # // +-0.0 + # result = S1.u32[sign(S0.f32) ? 5 : 6] + # endif; + # EXEC.u64[laneId] = D0.u64[laneId] = result + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + if isSignalNAN(F(S0.f32)): + result = S1.u32[0] + elif isQuietNAN(F(S0.f32)): + result = S1.u32[1] + elif exponent(S0.f32) == 255: + result = S1.u32[((2) if (sign(S0.f32)) else (9))] + elif exponent(S0.f32) > 0: + result = S1.u32[((3) if (sign(S0.f32)) else (8))] + elif F(abs(S0.f32)) > 0.0: + result = S1.u32[((4) if (sign(S0.f32)) else (7))] + else: + result = S1.u32[((5) if (sign(S0.f32)) else (6))] + EXEC.u64[laneId] = D0.u64[laneId] = result + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar + # S1.u[0] value is a signaling NAN. + # S1.u[1] value is a quiet NAN. + # S1.u[2] value is negative infinity. + # S1.u[3] value is a negative normal value. + # S1.u[4] value is a negative denormal value. + # S1.u[5] value is negative zero. + # S1.u[6] value is positive zero. + # S1.u[7] value is a positive denormal value. + # S1.u[8] value is a positive normal value. + # S1.u[9] value is positive infinity. + # declare result : 1'U; + # if isSignalNAN(S0.f64) then + # result = S1.u32[0] + # elsif isQuietNAN(S0.f64) then + # result = S1.u32[1] + # elsif exponent(S0.f64) == 2047 then + # // +-INF + # result = S1.u32[sign(S0.f64) ? 2 : 9] + # elsif exponent(S0.f64) > 0 then + # // +-normal value + # result = S1.u32[sign(S0.f64) ? 3 : 8] + # elsif abs(S0.f64) > 0.0 then + # // +-denormal value + # result = S1.u32[sign(S0.f64) ? 4 : 7] + # else + # // +-0.0 + # result = S1.u32[sign(S0.f64) ? 5 : 6] + # endif; + # D0.u64[laneId] = result; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + if isSignalNAN(S0.f64): + result = S1.u32[0] + elif isQuietNAN(S0.f64): + result = S1.u32[1] + elif exponent(S0.f64) == 2047: + result = S1.u32[((2) if (sign(S0.f64)) else (9))] + elif exponent(S0.f64) > 0: + result = S1.u32[((3) if (sign(S0.f64)) else (8))] + elif abs(S0.f64) > 0.0: + result = S1.u32[((4) if (sign(S0.f64)) else (7))] + else: + result = S1.u32[((5) if (sign(S0.f64)) else (6))] + D0.u64[laneId] = result + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # double-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask + # S1.u[0] value is a signaling NAN. + # S1.u[1] value is a quiet NAN. + # S1.u[2] value is negative infinity. + # S1.u[3] value is a negative normal value. + # S1.u[4] value is a negative denormal value. + # S1.u[5] value is negative zero. + # S1.u[6] value is positive zero. + # S1.u[7] value is a positive denormal value. + # S1.u[8] value is a positive normal value. + # S1.u[9] value is positive infinity. + # declare result : 1'U; + # if isSignalNAN(S0.f64) then + # result = S1.u32[0] + # elsif isQuietNAN(S0.f64) then + # result = S1.u32[1] + # elsif exponent(S0.f64) == 2047 then + # // +-INF + # result = S1.u32[sign(S0.f64) ? 2 : 9] + # elsif exponent(S0.f64) > 0 then + # // +-normal value + # result = S1.u32[sign(S0.f64) ? 3 : 8] + # elsif abs(S0.f64) > 0.0 then + # // +-denormal value + # result = S1.u32[sign(S0.f64) ? 4 : 7] + # else + # // +-0.0 + # result = S1.u32[sign(S0.f64) ? 5 : 6] + # endif; + # EXEC.u64[laneId] = D0.u64[laneId] = result + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + if isSignalNAN(S0.f64): + result = S1.u32[0] + elif isQuietNAN(S0.f64): + result = S1.u32[1] + elif exponent(S0.f64) == 2047: + result = S1.u32[((2) if (sign(S0.f64)) else (9))] + elif exponent(S0.f64) > 0: + result = S1.u32[((3) if (sign(S0.f64)) else (8))] + elif abs(S0.f64) > 0.0: + result = S1.u32[((4) if (sign(S0.f64)) else (7))] + else: + result = S1.u32[((5) if (sign(S0.f64)) else (6))] + EXEC.u64[laneId] = D0.u64[laneId] = result + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar + # S1.u[0] value is a signaling NAN. + # S1.u[1] value is a quiet NAN. + # S1.u[2] value is negative infinity. + # S1.u[3] value is a negative normal value. + # S1.u[4] value is a negative denormal value. + # S1.u[5] value is negative zero. + # S1.u[6] value is positive zero. + # S1.u[7] value is a positive denormal value. + # S1.u[8] value is a positive normal value. + # S1.u[9] value is positive infinity. + # declare result : 1'U; + # if isSignalNAN(64'F(S0.f16)) then + # result = S1.u32[0] + # elsif isQuietNAN(64'F(S0.f16)) then + # result = S1.u32[1] + # elsif exponent(S0.f16) == 31 then + # // +-INF + # result = S1.u32[sign(S0.f16) ? 2 : 9] + # elsif exponent(S0.f16) > 0 then + # // +-normal value + # result = S1.u32[sign(S0.f16) ? 3 : 8] + # elsif 64'F(abs(S0.f16)) > 0.0 then + # // +-denormal value + # result = S1.u32[sign(S0.f16) ? 4 : 7] + # else + # // +-0.0 + # result = S1.u32[sign(S0.f16) ? 5 : 6] + # endif; + # D0.u64[laneId] = result; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + if isSignalNAN(F(S0.f16)): + result = S1.u32[0] + elif isQuietNAN(F(S0.f16)): + result = S1.u32[1] + elif exponent(S0.f16) == 31: + result = S1.u32[((2) if (sign(S0.f16)) else (9))] + elif exponent(S0.f16) > 0: + result = S1.u32[((3) if (sign(S0.f16)) else (8))] + elif F(abs(S0.f16)) > 0.0: + result = S1.u32[((4) if (sign(S0.f16)) else (7))] + else: + result = S1.u32[((5) if (sign(S0.f16)) else (6))] + D0.u64[laneId] = result + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # half-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and + # S1.u[0] value is a signaling NAN. + # S1.u[1] value is a quiet NAN. + # S1.u[2] value is negative infinity. + # S1.u[3] value is a negative normal value. + # S1.u[4] value is a negative denormal value. + # S1.u[5] value is negative zero. + # S1.u[6] value is positive zero. + # S1.u[7] value is a positive denormal value. + # S1.u[8] value is a positive normal value. + # S1.u[9] value is positive infinity. + # declare result : 1'U; + # if isSignalNAN(64'F(S0.f16)) then + # result = S1.u32[0] + # elsif isQuietNAN(64'F(S0.f16)) then + # result = S1.u32[1] + # elsif exponent(S0.f16) == 31 then + # // +-INF + # result = S1.u32[sign(S0.f16) ? 2 : 9] + # elsif exponent(S0.f16) > 0 then + # // +-normal value + # result = S1.u32[sign(S0.f16) ? 3 : 8] + # elsif 64'F(abs(S0.f16)) > 0.0 then + # // +-denormal value + # result = S1.u32[sign(S0.f16) ? 4 : 7] + # else + # // +-0.0 + # result = S1.u32[sign(S0.f16) ? 5 : 6] + # endif; + # EXEC.u64[laneId] = D0.u64[laneId] = result + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + if isSignalNAN(F(S0.f16)): + result = S1.u32[0] + elif isQuietNAN(F(S0.f16)): + result = S1.u32[1] + elif exponent(S0.f16) == 31: + result = S1.u32[((2) if (sign(S0.f16)) else (9))] + elif exponent(S0.f16) > 0: + result = S1.u32[((3) if (sign(S0.f16)) else (8))] + elif F(abs(S0.f16)) > 0.0: + result = S1.u32[((4) if (sign(S0.f16)) else (7))] + else: + result = S1.u32[((5) if (sign(S0.f16)) else (6))] + EXEC.u64[laneId] = D0.u64[laneId] = result + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.f16 < S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f16 < S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.f16 == S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f16 == S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f16 <= S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f16 <= S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.f16 > S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f16 > S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f16 <> S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f16 != S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f16 >= S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f16 >= S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC + # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC or a scalar register. + # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f16 >= S1.f16); + # // With NAN inputs this is not the same operation as < + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f16 >= S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f16 <> S1.f16); + # // With NAN inputs this is not the same operation as == + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f16 != S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC or a scalar register. + # D0.u64[laneId] = !(S0.f16 > S1.f16); + # // With NAN inputs this is not the same operation as <= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f16 > S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f16 <= S1.f16); + # // With NAN inputs this is not the same operation as > + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f16 <= S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = !(S0.f16 == S1.f16); + # // With NAN inputs this is not the same operation as != + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f16 == S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC + # D0.u64[laneId] = !(S0.f16 < S1.f16); + # // With NAN inputs this is not the same operation as >= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f16 < S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <> S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 != S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 >= S1.f16); + # // With NAN inputs this is not the same operation as < + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 >= S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <> S1.f16); + # // With NAN inputs this is not the same operation as == + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 != S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 > S1.f16); + # // With NAN inputs this is not the same operation as <= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 > S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <= S1.f16); + # // With NAN inputs this is not the same operation as > + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 <= S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 == S1.f16); + # // With NAN inputs this is not the same operation as != + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 == S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 < S1.f16); + # // With NAN inputs this is not the same operation as >= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 < S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.f32 < S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f32 < S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.f32 == S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f32 == S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f32 <= S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f32 <= S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.f32 > S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f32 > S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f32 <> S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f32 != S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f32 >= S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f32 >= S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC + # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC or a scalar register. + # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f32 >= S1.f32); + # // With NAN inputs this is not the same operation as < + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f32 >= S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f32 <> S1.f32); + # // With NAN inputs this is not the same operation as == + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f32 != S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC or a scalar register. + # D0.u64[laneId] = !(S0.f32 > S1.f32); + # // With NAN inputs this is not the same operation as <= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f32 > S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f32 <= S1.f32); + # // With NAN inputs this is not the same operation as > + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f32 <= S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = !(S0.f32 == S1.f32); + # // With NAN inputs this is not the same operation as != + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f32 == S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC + # D0.u64[laneId] = !(S0.f32 < S1.f32); + # // With NAN inputs this is not the same operation as >= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f32 < S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <> S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 != S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 >= S1.f32); + # // With NAN inputs this is not the same operation as < + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 >= S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <> S1.f32); + # // With NAN inputs this is not the same operation as == + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 != S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 > S1.f32); + # // With NAN inputs this is not the same operation as <= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 > S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <= S1.f32); + # // With NAN inputs this is not the same operation as > + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 <= S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 == S1.f32); + # // With NAN inputs this is not the same operation as != + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 == S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 < S1.f32); + # // With NAN inputs this is not the same operation as >= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 < S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.f64 < S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f64 < S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.f64 == S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f64 == S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f64 <= S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f64 <= S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.f64 > S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f64 > S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f64 <> S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f64 != S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f64 >= S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f64 >= S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC + # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC or a scalar register. + # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f64 >= S1.f64); + # // With NAN inputs this is not the same operation as < + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f64 >= S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f64 <> S1.f64); + # // With NAN inputs this is not the same operation as == + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f64 != S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC or a scalar register. + # D0.u64[laneId] = !(S0.f64 > S1.f64); + # // With NAN inputs this is not the same operation as <= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f64 > S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f64 <= S1.f64); + # // With NAN inputs this is not the same operation as > + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f64 <= S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = !(S0.f64 == S1.f64); + # // With NAN inputs this is not the same operation as != + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f64 == S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC + # D0.u64[laneId] = !(S0.f64 < S1.f64); + # // With NAN inputs this is not the same operation as >= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f64 < S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <> S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 != S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 >= S1.f64); + # // With NAN inputs this is not the same operation as < + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 >= S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <> S1.f64); + # // With NAN inputs this is not the same operation as == + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 != S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 > S1.f64); + # // With NAN inputs this is not the same operation as <= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 > S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <= S1.f64); + # // With NAN inputs this is not the same operation as > + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 <= S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 == S1.f64); + # // With NAN inputs this is not the same operation as != + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 == S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 < S1.f64); + # // With NAN inputs this is not the same operation as >= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 < S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.i16 < S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i16 < S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.i16 == S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i16 == S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.i16 <= S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i16 <= S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.i16 > S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i16 > S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = S0.i16 <> S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i16 != S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.i16 >= S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i16 >= S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.u16 < S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u16 < S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.u16 == S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u16 == S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.u16 <= S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u16 <= S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.u16 > S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u16 > S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = S0.u16 <> S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u16 != S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.u16 >= S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u16 >= S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <> S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 != S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <> S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 != S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.i32 < S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i32 < S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.i32 == S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i32 == S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.i32 <= S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i32 <= S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.i32 > S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i32 > S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = S0.i32 <> S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i32 != S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.i32 >= S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i32 >= S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.u32 < S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u32 < S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.u32 == S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u32 == S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.u32 <= S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u32 <= S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.u32 > S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u32 > S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = S0.u32 <> S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u32 != S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.u32 >= S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u32 >= S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <> S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 != S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <> S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 != S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.i64 < S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i64 < S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.i64 == S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i64 == S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.i64 <= S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i64 <= S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.i64 > S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i64 > S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = S0.i64 <> S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i64 != S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.i64 >= S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i64 >= S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.u64 < S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u64 < S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.u64 == S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u64 == S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.u64 <= S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u64 <= S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.u64 > S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u64 > S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = S0.u64 <> S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u64 != S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.u64 >= S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u64 >= S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. + # D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <> S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 != S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; + # // D0 = VCC in VOPC encoding. + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <> S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 != S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. + # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; + # // D0 = VCC in VOPC encoding. + # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); + # tmp = MEM[addr].u32; + # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); + # tmp = MEM[addr].u32; + # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); + # tmp = MEM[addr].u32; + # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); + # tmp = MEM[addr].u32; + # src = DATA.u32; + D0 = Reg(d0) + VCC = Reg(vcc) + EXEC = Reg(exec_mask) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = D0.u64[laneId] = 1 + addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) + tmp = Reg(MEM[addr].u32) + addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) + tmp = Reg(MEM[addr].u32) + addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) + tmp = Reg(MEM[addr].u32) + addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) + tmp = Reg(MEM[addr].u32) + src = DATA.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _VOP3AOp_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.b32 = S0.b32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b32 = S0.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare lane : 32'I; + # if EXEC == 0x0LL then + # lane = 0; + # // Force lane 0 if all lanes are disabled + # else + # lane = s_ff1_i32_b64(EXEC); + # // Lowest active lane + # endif; + # D0.b32 = VGPR[lane][SRC0.u32] + D0 = Reg(d0) + EXEC = Reg(exec_mask) + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + if EXEC == 0x0: + lane = 0 + else: + lane = s_ff1_i32_b64(EXEC) + D0.b32 = VGPR[lane][SRC0.u32] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _VOP3AOp_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f64_to_i32(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f64_to_i32(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = i32_to_f64(S0.i32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = i32_to_f64(S0.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = i32_to_f32(S0.i32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = i32_to_f32(S0.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0.u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = f32_to_u32(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = f32_to_u32(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = f32_to_f16(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = f32_to_f16(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = f16_to_f32(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = f16_to_f32(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(floor(S0.f32)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(floor(S0.f32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = f64_to_f32(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = f64_to_f32(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = f32_to_f64(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = f32_to_f64(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[7 : 0].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[7 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[15 : 8].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[15 : 8].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[23 : 16].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[23 : 16].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[31 : 24].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[31 : 24].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = f64_to_u32(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = f64_to_u32(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = u32_to_f64(S0.u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = u32_to_f64(S0.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = trunc(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = trunc(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = trunc(S0.f64); + # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then + # D0.f64 += 1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = trunc(S0.f64) + if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): + D0.f64 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = floor(S0.f64 + 0.5); + # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then + # D0.f64 -= 1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = floor(S0.f64 + 0.5) + if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): + D0.f64 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = trunc(S0.f64); + # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then + # D0.f64 += -1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = trunc(S0.f64) + if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): + D0.f64 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 + -floor(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 + -floor(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32); + # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then + # D0.f32 += 1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): + D0.f32 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = floor(S0.f32 + 0.5F); + # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then + # D0.f32 -= 1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = floor(S0.f32 + 0.5) + if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): + D0.f32 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32); + # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then + # D0.f32 += -1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): + D0.f32 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = pow(2.0F, S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = pow(2.0, S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = log2(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = log2(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / S0.f32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / S0.f32; + # // Can only raise integer DIV_BY_ZERO exception + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / sqrt(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / sqrt(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = 1.0 / S0.f64 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = 1.0 / S0.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = 1.0 / sqrt(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = 1.0 / sqrt(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = sqrt(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = sqrt(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = sqrt(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = sqrt(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = sin(S0.f32 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = cos(S0.f32 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~S0.u32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ~S0.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32[31 : 0] = S0.u32[0 : 31] + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32[31 : 0] = S0.u32[0 : 31] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = -1; + # // Set if no ones are found + # for i in 0 : 31 do + # // Search from MSB + # if S0.u32[31 - i] == 1'1U then + # D0.i32 = i; + # endif + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = -1 + for i in range(0, int(31)+1): + if S0.u32[31 - i] == 1: + D0.i32 = i + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = -1; + # // Set if no ones are found + # for i in 0 : 31 do + # // Search from LSB + # if S0.u32[i] == 1'1U then + # D0.i32 = i; + # endif + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = -1 + for i in range(0, int(31)+1): + if S0.u32[i] == 1: + D0.i32 = i + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = -1; + # // Set if all bits are the same + # for i in 1 : 31 do + # // Search from MSB + # if S0.i32[31 - i] != S0.i32[31] then + # D0.i32 = i; + # endif + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = -1 + for i in range(1, int(31)+1): + if S0.i32[31 - i] != S0.i32[31]: + D0.i32 = i + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then + # D0.i32 = 0 + # else + # D0.i32 = exponent(S0.f64) - 1023 + 1 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): + D0.i32 = 0 + else: + D0.i32 = exponent(S0.f64) - 1023 + 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then + # D0.f64 = S0.f64 + # else + # D0.f64 = mantissa(S0.f64) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): + D0.f64 = S0.f64 + else: + D0.f64 = mantissa(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = S0.f64 + -floor(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = S0.f64 + -floor(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then + # D0.i32 = 0 + # else + # D0.i32 = exponent(S0.f32) - 127 + 1 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): + D0.i32 = 0 + else: + D0.i32 = exponent(S0.f32) - 127 + 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then + # D0.f32 = S0.f32 + # else + # D0.f32 = mantissa(S0.f32) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = mantissa(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.b64 = S0.b64 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b64 = S0.b64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = u16_to_f16(S0.u16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = u16_to_f16(S0.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = i16_to_f16(S0.i16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = i16_to_f16(S0.i16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = f16_to_u16(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = f16_to_u16(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = f16_to_i16(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = f16_to_i16(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = 16'1.0 / S0.f16 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = 1.0 / S0.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = sqrt(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = sqrt(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = 16'1.0 / sqrt(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = 1.0 / sqrt(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = log2(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = log2(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = pow(16'2.0, S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = pow(2.0, S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3AOp_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 + S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 + S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 - S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 - S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S1.f32 - S0.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S1.f32 - S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = fma(S0.f64, S1.f64, D0.f64) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = fma(S0.f64, S1.f64, D0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 * S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 * S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (S0.i24) * (S1.i24) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (((S0.i24) * (S1.i24)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u24) * (S1.u24) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (((S0.u24) * (S1.u24)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) + # elsif isNAN(64'F(S0.f32)) then + # D0.f32 = S1.f32 + # elsif isNAN(64'F(S1.f32)) then + # D0.f32 = S0.f32 + # elsif ((64'F(S0.f32) == +0.0) && (64'F(S1.f32) == -0.0)) then + # D0.f32 = S1.f32 + # elsif ((64'F(S0.f32) == -0.0) && (64'F(S1.f32) == +0.0)) then + # D0.f32 = S0.f32 + # else + # D0.f32 = S0.f32 < S1.f32 ? S0.f32 : S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))): + D0.f32 = F(cvtToQuietNAN(F(S1.f32))) + elif isNAN(F(S0.f32)): + D0.f32 = S1.f32 + elif isNAN(F(S1.f32)): + D0.f32 = S0.f32 + elif ((F(S0.f32) == +0.0) and (F(S1.f32) == -0.0)): + D0.f32 = S1.f32 + elif ((F(S0.f32) == -0.0) and (F(S1.f32) == +0.0)): + D0.f32 = S0.f32 + else: + D0.f32 = ((S0.f32) if (S0.f32 < S1.f32) else (S1.f32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) + # elsif isNAN(64'F(S0.f32)) then + # D0.f32 = S1.f32 + # elsif isNAN(64'F(S1.f32)) then + # D0.f32 = S0.f32 + # elsif ((64'F(S0.f32) == +0.0) && (64'F(S1.f32) == -0.0)) then + # D0.f32 = S0.f32 + # elsif ((64'F(S0.f32) == -0.0) && (64'F(S1.f32) == +0.0)) then + # D0.f32 = S1.f32 + # elsif WAVE_MODE.IEEE then + # D0.f32 = S0.f32 >= S1.f32 ? S0.f32 : S1.f32 + # else + # D0.f32 = S0.f32 > S1.f32 ? S0.f32 : S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))): + D0.f32 = F(cvtToQuietNAN(F(S1.f32))) + elif isNAN(F(S0.f32)): + D0.f32 = S1.f32 + elif isNAN(F(S1.f32)): + D0.f32 = S0.f32 + elif ((F(S0.f32) == +0.0) and (F(S1.f32) == -0.0)): + D0.f32 = S0.f32 + elif ((F(S0.f32) == -0.0) and (F(S1.f32) == +0.0)): + D0.f32 = S1.f32 + elif WAVE_MODE.IEEE: + D0.f32 = ((S0.f32) if (S0.f32 >= S1.f32) else (S1.f32)) + else: + D0.f32 = ((S0.f32) if (S0.f32 > S1.f32) else (S1.f32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S1.u32 >> S0[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S1.u32 >> S0[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = (S1.i32 >> S0[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (S1.i32 >> S0[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S1.u32 << S0[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S1.u32 << S0[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 & S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 & S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 | S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 | S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 ^ S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 ^ S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 + S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 + S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 - S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 - S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S1.f16 - S0.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S1.f16 - S0.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 * S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 * S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.f16 * S1.f16 + D0.f16; + # if OPSEL.u4[3] then + # D0 = { tmp.f16, D0[15 : 0] } + # else + # D0 = { 16'0, tmp.f16 } + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.f16 * S1.f16 + D0.f16) + if OPSEL.u4[3]: + D0 = Reg(_pack(tmp.f16, D0[15 : 0])) + else: + D0 = Reg(_pack(0, tmp.f16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 + S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S0.u16 + S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 - S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S0.u16 - S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S1.u16 - S0.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S1.u16 - S0.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 * S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S0.u16 * S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = (S1.u16 << S0[3 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = (S1.u16 << S0[3 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = (S1.u16 >> S0[3 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = (S1.u16 >> S0[3 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = (S1.i16 >> S0[3 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = (S1.i16 >> S0[3 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) + # elsif isNAN(64'F(S0.f16)) then + # D0.f16 = S1.f16 + # elsif isNAN(64'F(S1.f16)) then + # D0.f16 = S0.f16 + # elsif ((64'F(S0.f16) == +0.0) && (64'F(S1.f16) == -0.0)) then + # D0.f16 = S0.f16 + # elsif ((64'F(S0.f16) == -0.0) && (64'F(S1.f16) == +0.0)) then + # D0.f16 = S1.f16 + # elsif WAVE_MODE.IEEE then + # D0.f16 = S0.f16 >= S1.f16 ? S0.f16 : S1.f16 + # else + # D0.f16 = S0.f16 > S1.f16 ? S0.f16 : S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))): + D0.f16 = F(cvtToQuietNAN(F(S1.f16))) + elif isNAN(F(S0.f16)): + D0.f16 = S1.f16 + elif isNAN(F(S1.f16)): + D0.f16 = S0.f16 + elif ((F(S0.f16) == +0.0) and (F(S1.f16) == -0.0)): + D0.f16 = S0.f16 + elif ((F(S0.f16) == -0.0) and (F(S1.f16) == +0.0)): + D0.f16 = S1.f16 + elif WAVE_MODE.IEEE: + D0.f16 = ((S0.f16) if (S0.f16 >= S1.f16) else (S1.f16)) + else: + D0.f16 = ((S0.f16) if (S0.f16 > S1.f16) else (S1.f16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) + # elsif isNAN(64'F(S0.f16)) then + # D0.f16 = S1.f16 + # elsif isNAN(64'F(S1.f16)) then + # D0.f16 = S0.f16 + # elsif ((64'F(S0.f16) == +0.0) && (64'F(S1.f16) == -0.0)) then + # D0.f16 = S1.f16 + # elsif ((64'F(S0.f16) == -0.0) && (64'F(S1.f16) == +0.0)) then + # D0.f16 = S0.f16 + # else + # D0.f16 = S0.f16 < S1.f16 ? S0.f16 : S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))): + D0.f16 = F(cvtToQuietNAN(F(S1.f16))) + elif isNAN(F(S0.f16)): + D0.f16 = S1.f16 + elif isNAN(F(S1.f16)): + D0.f16 = S0.f16 + elif ((F(S0.f16) == +0.0) and (F(S1.f16) == -0.0)): + D0.f16 = S1.f16 + elif ((F(S0.f16) == -0.0) and (F(S1.f16) == +0.0)): + D0.f16 = S0.f16 + else: + D0.f16 = ((S0.f16) if (S0.f16 < S1.f16) else (S1.f16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 + S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S0.u32 + S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 - S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S0.u32 - S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S1.u32 - S0.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S1.u32 - S0.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.f32; + # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); + # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); + # D0.f32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.f32) + tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) + tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) + D0.f32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.i32; + # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); + # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); + # D0.i32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.i32) + tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16) + tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.i32; + # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); + # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); + # tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8); + # tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8); + # D0.i32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.i32) + tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8) + tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8) + tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8) + tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.i32; + # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); + # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); + # tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4); + # tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4); + # tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4); + # tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4); + # tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4); + # tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4); + # D0.i32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.i32) + tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4) + tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4) + tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4) + tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4) + tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4) + tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4) + tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4) + tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, S1.f32, D0.f32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, S1.f32, D0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16); + # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) + D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~(S0.u32 ^ S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ~(S0.u32 ^ S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (S0.i24) * (S1.i24) + S2.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u24) * (S1.u24) + S2.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). + # // XYZ coordinate is given in (S0.f, S1.f, S2.f). + # // S0.f = x + # // S1.f = y + # // S2.f = z + # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then + # if S2.f32 < 0.0F then + # D0.f32 = 5.0F + # else + # D0.f32 = 4.0F + # endif + # elsif abs(S1.f32) >= abs(S0.f32) then + # if S1.f32 < 0.0F then + # D0.f32 = 3.0F + # else + # D0.f32 = 2.0F + # endif + # else + # if S0.f32 < 0.0F then + # D0.f32 = 1.0F + # else + # D0.f32 = 0.0F + # endif + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): + if S2.f32 < 0.0: + D0.f32 = 5.0 + else: + D0.f32 = 4.0 + elif abs(S1.f32) >= abs(S0.f32): + if S1.f32 < 0.0: + D0.f32 = 3.0 + else: + D0.f32 = 2.0 + else: + if S0.f32 < 0.0: + D0.f32 = 1.0 + else: + D0.f32 = 0.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // D0.f = cubemap S coordinate. + # // XYZ coordinate is given in (S0.f, S1.f, S2.f). + # // S0.f = x + # // S1.f = y + # // S2.f = z + # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then + # if S2.f32 < 0.0F then + # D0.f32 = -S0.f32 + # else + # D0.f32 = S0.f32 + # endif + # elsif abs(S1.f32) >= abs(S0.f32) then + # D0.f32 = S0.f32 + # else + # if S0.f32 < 0.0F then + # D0.f32 = S2.f32 + # else + # D0.f32 = -S2.f32 + # endif + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): + if S2.f32 < 0.0: + D0.f32 = -S0.f32 + else: + D0.f32 = S0.f32 + elif abs(S1.f32) >= abs(S0.f32): + D0.f32 = S0.f32 + else: + if S0.f32 < 0.0: + D0.f32 = S2.f32 + else: + D0.f32 = -S2.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // D0.f = cubemap T coordinate. + # // XYZ coordinate is given in (S0.f, S1.f, S2.f). + # // S0.f = x + # // S1.f = y + # // S2.f = z + # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then + # D0.f32 = -S1.f32 + # elsif abs(S1.f32) >= abs(S0.f32) then + # if S1.f32 < 0.0F then + # D0.f32 = -S2.f32 + # else + # D0.f32 = S2.f32 + # endif + # else + # D0.f32 = -S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): + D0.f32 = -S1.f32 + elif abs(S1.f32) >= abs(S0.f32): + if S1.f32 < 0.0: + D0.f32 = -S2.f32 + else: + D0.f32 = S2.f32 + else: + D0.f32 = -S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // D0.f = 2.0 * cubemap major axis. + # // XYZ coordinate is given in (S0.f, S1.f, S2.f). + # // S0.f = x + # // S1.f = y + # // S2.f = z + # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then + # D0.f32 = S2.f32 * 2.0F + # elsif abs(S1.f32) >= abs(S0.f32) then + # D0.f32 = S1.f32 * 2.0F + # else + # D0.f32 = S0.f32 * 2.0F + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): + D0.f32 = S2.f32 * 2.0 + elif abs(S1.f32) >= abs(S0.f32): + D0.f32 = S1.f32 * 2.0 + else: + D0.f32 = S0.f32 * 2.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); + # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) + D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, S1.f32, S2.f32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, S1.f32, S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = fma(S0.f64, S1.f64, S2.f64) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = fma(S0.f64, S1.f64, S2.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); + # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); + # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); + # tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1U); + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1 << 24)) + tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1 << 16) + tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1 << 8) + tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0]) & 0xffffffffLL) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((_pack32(S0.u32, S1.u32) >> S2.u32[4 : 0]) & 0xffffffff) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0] * 8U)) & 0xffffffffLL) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((_pack32(S0.u32, S1.u32) >> (S2.u32[1 : 0] * 8)) & 0xffffffff) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then + # D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) + # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then + # D0.f32 = v_max_f32(S1.f32, S2.f32) + # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S1.f32 then + # D0.f32 = v_max_f32(S0.f32, S2.f32) + # else + # D0.f32 = v_max_f32(S0.f32, S1.f32) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isNAN(F(S0.f32)) or isNAN(F(S1.f32)) or isNAN(F(S2.f32))): + D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) + elif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32: + D0.f32 = v_max_f32(S1.f32, S2.f32) + elif v_max3_f32(S0.f32, S1.f32, S2.f32) == S1.f32: + D0.f32 = v_max_f32(S0.f32, S2.f32) + else: + D0.f32 = v_max_f32(S0.f32, S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then + # D0.i32 = v_max_i32(S1.i32, S2.i32) + # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then + # D0.i32 = v_max_i32(S0.i32, S2.i32) + # else + # D0.i32 = v_max_i32(S0.i32, S1.i32) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32: + D0.i32 = v_max_i32(S1.i32, S2.i32) + elif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32: + D0.i32 = v_max_i32(S0.i32, S2.i32) + else: + D0.i32 = v_max_i32(S0.i32, S1.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then + # D0.u32 = v_max_u32(S1.u32, S2.u32) + # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then + # D0.u32 = v_max_u32(S0.u32, S2.u32) + # else + # D0.u32 = v_max_u32(S0.u32, S1.u32) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32: + D0.u32 = v_max_u32(S1.u32, S2.u32) + elif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32: + D0.u32 = v_max_u32(S0.u32, S2.u32) + else: + D0.u32 = v_max_u32(S0.u32, S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // UNSIGNED comparison + # tmp = S2.u32; + # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); + # tmp += 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); + # tmp += 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); + # tmp += 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.u32) + tmp += (ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])) + tmp += (ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])) + tmp += (ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])) + tmp += (ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // UNSIGNED comparison + # tmp = S2.u32; + # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); + # tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.u32) + tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16) + tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // UNSIGNED comparison + # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); + # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg((S2.u32 & (~(0xff << (S1.u32[1 : 0].u32 * 8))))) + tmp = Reg((tmp | (((f32_to_u8(S0.f32)) & 255) << (S1.u32[1 : 0].u32 * 8)))) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # sign_out = (sign(S1.f32) ^ sign(S2.f32)); + # if isNAN(64'F(S2.f32)) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) + # elsif isNAN(64'F(S1.f32)) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) + # elsif ((64'F(S1.f32) == 0.0) && (64'F(S2.f32) == 0.0)) then + # // 0/0 + # D0.f32 = 32'F(0xffc00000) + # elsif ((64'F(abs(S1.f32)) == +INF) && (64'F(abs(S2.f32)) == +INF)) then + # // inf/inf + # D0.f32 = 32'F(0xffc00000) + # elsif ((64'F(S1.f32) == 0.0) || (64'F(abs(S2.f32)) == +INF)) then + # // x/0, or inf/y + # D0.f32 = sign_out ? -INF.f32 : +INF.f32 + # elsif ((64'F(abs(S1.f32)) == +INF) || (64'F(S2.f32) == 0.0)) then + # // x/inf, 0/y + # D0.f32 = sign_out ? -0.0F : 0.0F + # elsif exponent(S2.f32) - exponent(S1.f32) < -150 then + # D0.f32 = sign_out ? -UNDERFLOW_F32 : UNDERFLOW_F32 + # elsif exponent(S1.f32) == 255 then + # D0.f32 = sign_out ? -OVERFLOW_F32 : OVERFLOW_F32 + # else + # D0.f32 = sign_out ? -abs(S0.f32) : abs(S0.f32) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + sign_out = (sign(S1.f32) ^ sign(S2.f32)) + if isNAN(F(S2.f32)): + D0.f32 = F(cvtToQuietNAN(F(S2.f32))) + elif isNAN(F(S1.f32)): + D0.f32 = F(cvtToQuietNAN(F(S1.f32))) + elif ((F(S1.f32) == 0.0) and (F(S2.f32) == 0.0)): + D0.f32 = F(0xffc00000) + elif ((F(abs(S1.f32)) == INF) and (F(abs(S2.f32)) == INF)): + D0.f32 = F(0xffc00000) + elif ((F(S1.f32) == 0.0) or (F(abs(S2.f32)) == INF)): + D0.f32 = (((-INF).f32) if (sign_out) else (INF.f32)) + elif ((F(abs(S1.f32)) == INF) or (F(S2.f32) == 0.0)): + D0.f32 = ((-0.0) if (sign_out) else (0.0)) + elif exponent(S2.f32) - exponent(S1.f32) < -150: + D0.f32 = ((-UNDERFLOW_F32) if (sign_out) else (UNDERFLOW_F32)) + elif exponent(S1.f32) == 255: + D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) + else: + D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) if isNAN(S0.f32) else ((-abs(S0.f32)) if (sign_out) else (abs(S0.f32))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # sign_out = (sign(S1.f64) ^ sign(S2.f64)); + # if isNAN(S2.f64) then + # D0.f64 = cvtToQuietNAN(S2.f64) + # elsif isNAN(S1.f64) then + # D0.f64 = cvtToQuietNAN(S1.f64) + # elsif ((S1.f64 == 0.0) && (S2.f64 == 0.0)) then + # // 0/0 + # D0.f64 = 64'F(0xfff8000000000000LL) + # elsif ((abs(S1.f64) == +INF) && (abs(S2.f64) == +INF)) then + # // inf/inf + # D0.f64 = 64'F(0xfff8000000000000LL) + # elsif ((S1.f64 == 0.0) || (abs(S2.f64) == +INF)) then + # // x/0, or inf/y + # D0.f64 = sign_out ? -INF : +INF + # elsif ((abs(S1.f64) == +INF) || (S2.f64 == 0.0)) then + # // x/inf, 0/y + # D0.f64 = sign_out ? -0.0 : 0.0 + # elsif exponent(S2.f64) - exponent(S1.f64) < -1075 then + # D0.f64 = sign_out ? -UNDERFLOW_F64 : UNDERFLOW_F64 + # elsif exponent(S1.f64) == 2047 then + # D0.f64 = sign_out ? -OVERFLOW_F64 : OVERFLOW_F64 + # else + # D0.f64 = sign_out ? -abs(S0.f64) : abs(S0.f64) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + sign_out = (sign(S1.f64) ^ sign(S2.f64)) + if isNAN(S2.f64): + D0.f64 = cvtToQuietNAN(S2.f64) + elif isNAN(S1.f64): + D0.f64 = cvtToQuietNAN(S1.f64) + elif ((S1.f64 == 0.0) and (S2.f64 == 0.0)): + D0.f64 = F(0xfff8000000000000) + elif ((abs(S1.f64) == INF) and (abs(S2.f64) == INF)): + D0.f64 = F(0xfff8000000000000) + elif ((S1.f64 == 0.0) or (abs(S2.f64) == INF)): + D0.f64 = (((-INF)) if (sign_out) else (INF)) + elif ((abs(S1.f64) == INF) or (S2.f64 == 0.0)): + D0.f64 = ((-0.0) if (sign_out) else (0.0)) + elif exponent(S2.f64) - exponent(S1.f64) < -1075: + D0.f64 = ((-UNDERFLOW_F64) if (sign_out) else (UNDERFLOW_F64)) + elif exponent(S1.f64) == 2047: + D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) + else: + D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if VCC.u64[laneId] then + # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) + # else + # D0.f32 = fma(S0.f32, S1.f32, S2.f32) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + if VCC.u64[laneId]: + D0.f32 = (2.0 ** 64 if exponent(S2.f32) > 127 else 2.0 ** -64) * fma(S0.f32, S1.f32, S2.f32) + else: + D0.f32 = fma(S0.f32, S1.f32, S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3AOp_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if VCC.u64[laneId] then + # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) + # else + # D0.f64 = fma(S0.f64, S1.f64, S2.f64) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + if VCC.u64[laneId]: + D0.f64 = (2.0 ** 128 if exponent(S2.f64) > 1023 else 2.0 ** -128) * fma(S0.f64, S1.f64, S2.f64) + else: + D0.f64 = fma(S0.f64, S1.f64, S2.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3AOp_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // UNSIGNED comparison + # tmp = S2.u32; + # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); + # tmp += S1.u32[15 : 8] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); + # tmp += S1.u32[23 : 16] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); + # tmp += S1.u32[31 : 24] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.u32) + tmp += ((0) if (S1.u32[7 : 0] == 0) else ((ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])))) + tmp += ((0) if (S1.u32[15 : 8] == 0) else ((ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])))) + tmp += ((0) if (S1.u32[23 : 16] == 0) else ((ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])))) + tmp += ((0) if (S1.u32[31 : 24] == 0) else ((ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])))) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAD_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.f16 * S1.f16 + S2.f16; + # if OPSEL.u4[3] then + # D0 = { tmp.f16, D0[15 : 0] } + # else + # D0 = { 16'0, tmp.f16 } + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.f16 * S1.f16 + S2.f16) + if OPSEL.u4[3]: + D0 = Reg(_pack(tmp.f16, D0[15 : 0])) + else: + D0 = Reg(_pack(0, tmp.f16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAD_LEGACY_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u16 * S1.u16 + S2.u16; + # if OPSEL.u4[3] then + # D0 = { tmp.u16, D0[15 : 0] } + # else + # D0 = { 16'0, tmp.u16 } + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.u16 * S1.u16 + S2.u16) + if OPSEL.u4[3]: + D0 = Reg(_pack(tmp.u16, D0[15 : 0])) + else: + D0 = Reg(_pack(0, tmp.u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAD_LEGACY_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.i16 * S1.i16 + S2.i16; + # if OPSEL.u4[3] then + # D0 = { tmp.i16, D0[15 : 0] } + # else + # D0 = { 16'0, tmp.i16 } + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.i16 * S1.i16 + S2.i16) + if OPSEL.u4[3]: + D0 = Reg(_pack(tmp.i16, D0[15 : 0])) + else: + D0 = Reg(_pack(0, tmp.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FMA_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = fma(S0.f16, S1.f16, S2.f16); + # if OPSEL.u4[3] then + # D0 = { tmp.f16, D0[15 : 0] } + # else + # D0 = { 16'0, tmp.f16 } + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(fma(S0.f16, S1.f16, S2.f16)) + if OPSEL.u4[3]: + D0 = Reg(_pack(tmp.f16, D0[15 : 0])) + else: + D0 = Reg(_pack(0, tmp.f16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # sign_out = (sign(S1.f16) ^ sign(S2.f16)); + # if isNAN(64'F(S2.f16)) then + # tmp = cvtToQuietNAN(64'F(S2.f16)) + # elsif isNAN(64'F(S1.f16)) then + # tmp = cvtToQuietNAN(64'F(S1.f16)) + # elsif ((64'F(S1.f16) == 0.0) && (64'F(S2.f16) == 0.0)) then + # // 0/0 + # tmp = 16'F(0xfe00) + # elsif ((64'F(abs(S1.f16)) == +INF) && (64'F(abs(S2.f16)) == +INF)) then + # // inf/inf + # tmp = 16'F(0xfe00) + # elsif ((64'F(S1.f16) == 0.0) || (64'F(abs(S2.f16)) == +INF)) then + # // x/0, or inf/y + # tmp = sign_out ? -INF : +INF + # elsif ((64'F(abs(S1.f16)) == +INF) || (64'F(S2.f16) == 0.0)) then + # // x/inf, 0/y + # tmp = sign_out ? -0.0 : 0.0 + # else + # tmp = sign_out ? -abs(S0.f16) : abs(S0.f16) + # endif; + # if OPSEL.u4[3] then + # D0 = { tmp.f16, D0[15 : 0] } + # else + # D0 = { 16'0, tmp.f16 } + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + sign_out = (sign(S1.f16) ^ sign(S2.f16)) + if isNAN(F(S2.f16)): + tmp = Reg(cvtToQuietNAN(F(S2.f16))) + elif isNAN(F(S1.f16)): + tmp = Reg(cvtToQuietNAN(F(S1.f16))) + elif ((F(S1.f16) == 0.0) and (F(S2.f16) == 0.0)): + tmp = Reg(F(0xfe00)) + elif ((F(abs(S1.f16)) == INF) and (F(abs(S2.f16)) == INF)): + tmp = Reg(F(0xfe00)) + elif ((F(S1.f16) == 0.0) or (F(abs(S2.f16)) == INF)): + tmp = Reg((((-INF)) if (sign_out) else (INF))) + elif ((F(abs(S1.f16)) == INF) or (F(S2.f16) == 0.0)): + tmp = Reg(((-0.0) if (sign_out) else (0.0))) + else: + tmp = Reg(((-abs(S0.f16)) if (sign_out) else (abs(S0.f16)))) + if OPSEL.u4[3]: + D0 = Reg(_pack(tmp.f16, D0[15 : 0])) + else: + D0 = Reg(_pack(0, tmp.f16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_PKACCUM_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # byte = S1.u32[1 : 0]; + # bit = byte.u32 * 8U; + # D0.u32[bit + 7U : bit] = 32'U(f32_to_u8(S0.f32)) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + byte = S1.u32[1 : 0] + bit = byte.u32 * 8 + D0.u32[bit + 7U : bit] = (f32_to_u8(S0.f32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u16) * (S1.u16) + S2.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (S0.i16) * (S1.i16) + S2.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then + # D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) + # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then + # D0.f16 = v_max_f16(S1.f16, S2.f16) + # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S1.f16 then + # D0.f16 = v_max_f16(S0.f16, S2.f16) + # else + # D0.f16 = v_max_f16(S0.f16, S1.f16) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isNAN(F(S0.f16)) or isNAN(F(S1.f16)) or isNAN(F(S2.f16))): + D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) + elif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16: + D0.f16 = v_max_f16(S1.f16, S2.f16) + elif v_max3_f16(S0.f16, S1.f16, S2.f16) == S1.f16: + D0.f16 = v_max_f16(S0.f16, S2.f16) + else: + D0.f16 = v_max_f16(S0.f16, S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then + # D0.i16 = v_max_i16(S1.i16, S2.i16) + # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then + # D0.i16 = v_max_i16(S0.i16, S2.i16) + # else + # D0.i16 = v_max_i16(S0.i16, S1.i16) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16: + D0.i16 = v_max_i16(S1.i16, S2.i16) + elif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16: + D0.i16 = v_max_i16(S0.i16, S2.i16) + else: + D0.i16 = v_max_i16(S0.i16, S1.i16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then + # D0.u16 = v_max_u16(S1.u16, S2.u16) + # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then + # D0.u16 = v_max_u16(S0.u16, S2.u16) + # else + # D0.u16 = v_max_u16(S0.u16, S1.u16) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16: + D0.u16 = v_max_u16(S1.u16, S2.u16) + elif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16: + D0.u16 = v_max_u16(S0.u16, S2.u16) + else: + D0.u16 = v_max_u16(S0.u16, S1.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 + S1.u32 + S2.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S0.u32 + S1.u32 + S2.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32 & S1.u32) | S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 | S1.u32 | S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 | S1.u32 | S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 * S1.f16 + S2.f16 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 * S1.f16 + S2.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 * S1.u16 + S2.u16 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S0.u16 * S1.u16 + S2.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = S0.i16 * S1.i16 + S2.i16 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = S0.i16 * S1.i16 + S2.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = fma(S0.f16, S1.f16, S2.f16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = fma(S0.f16, S1.f16, S2.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # sign_out = (sign(S1.f16) ^ sign(S2.f16)); + # if isNAN(64'F(S2.f16)) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) + # elsif isNAN(64'F(S1.f16)) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) + # elsif ((64'F(S1.f16) == 0.0) && (64'F(S2.f16) == 0.0)) then + # // 0/0 + # D0.f16 = 16'F(0xfe00) + # elsif ((64'F(abs(S1.f16)) == +INF) && (64'F(abs(S2.f16)) == +INF)) then + # // inf/inf + # D0.f16 = 16'F(0xfe00) + # elsif ((64'F(S1.f16) == 0.0) || (64'F(abs(S2.f16)) == +INF)) then + # // x/0, or inf/y + # D0.f16 = sign_out ? -INF.f16 : +INF.f16 + # elsif ((64'F(abs(S1.f16)) == +INF) || (64'F(S2.f16) == 0.0)) then + # // x/inf, 0/y + # D0.f16 = sign_out ? -16'0.0 : 16'0.0 + # else + # D0.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + sign_out = (sign(S1.f16) ^ sign(S2.f16)) + if isNAN(F(S2.f16)): + D0.f16 = F(cvtToQuietNAN(F(S2.f16))) + elif isNAN(F(S1.f16)): + D0.f16 = F(cvtToQuietNAN(F(S1.f16))) + elif ((F(S1.f16) == 0.0) and (F(S2.f16) == 0.0)): + D0.f16 = F(0xfe00) + elif ((F(abs(S1.f16)) == INF) and (F(abs(S2.f16)) == INF)): + D0.f16 = F(0xfe00) + elif ((F(S1.f16) == 0.0) or (F(abs(S2.f16)) == INF)): + D0.f16 = (((-INF).f16) if (sign_out) else (INF.f16)) + elif ((F(abs(S1.f16)) == INF) or (F(S2.f16) == 0.0)): + D0.f16 = ((-0.0) if (sign_out) else (0.0)) + else: + D0.f16 = ((-abs(S0.f16)) if (sign_out) else (abs(S0.f16))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_LSHL_ADD_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 << S1.u32[2 : 0].u32) + S2.u64 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 << S1.u32[2 : 0].u32) + S2.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_BITOP3_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 16'0U; + # tmp = (tmp | (32'I(TTBL.b32 & 0x1) != 0 ? 16'U(~S0.b16 & ~S1.b16 & ~S2.b16) : 16'0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x2) != 0 ? 16'U(~S0.b16 & ~S1.b16 & S2.b16) : 16'0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x4) != 0 ? 16'U(~S0.b16 & S1.b16 & ~S2.b16) : 16'0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x8) != 0 ? 16'U(~S0.b16 & S1.b16 & S2.b16) : 16'0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x10) != 0 ? 16'U(S0.b16 & ~S1.b16 & ~S2.b16) : 16'0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x20) != 0 ? 16'U(S0.b16 & ~S1.b16 & S2.b16) : 16'0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x40) != 0 ? 16'U(S0.b16 & S1.b16 & ~S2.b16) : 16'0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x80) != 0 ? 16'U(S0.b16 & S1.b16 & S2.b16) : 16'0U)); + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(0) + tmp = Reg((tmp | ((TTBL.b32 & 0x1) != 0 ? (~S0.b16 & ~S1.b16 & ~S2.b16) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x2) != 0 ? (~S0.b16 & ~S1.b16 & S2.b16) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x4) != 0 ? (~S0.b16 & S1.b16 & ~S2.b16) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x8) != 0 ? (~S0.b16 & S1.b16 & S2.b16) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x10) != 0 ? (S0.b16 & ~S1.b16 & ~S2.b16) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x20) != 0 ? (S0.b16 & ~S1.b16 & S2.b16) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x40) != 0 ? (S0.b16 & S1.b16 & ~S2.b16) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x80) != 0 ? (S0.b16 & S1.b16 & S2.b16) : 0))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_BITOP3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 0U; + # tmp = (tmp | (32'I(TTBL.b32 & 0x1) != 0 ? 32'U(~S0.b32 & ~S1.b32 & ~S2.b32) : 0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x2) != 0 ? 32'U(~S0.b32 & ~S1.b32 & S2.b32) : 0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x4) != 0 ? 32'U(~S0.b32 & S1.b32 & ~S2.b32) : 0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x8) != 0 ? 32'U(~S0.b32 & S1.b32 & S2.b32) : 0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x10) != 0 ? 32'U(S0.b32 & ~S1.b32 & ~S2.b32) : 0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x20) != 0 ? 32'U(S0.b32 & ~S1.b32 & S2.b32) : 0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x40) != 0 ? 32'U(S0.b32 & S1.b32 & ~S2.b32) : 0U)); + # tmp = (tmp | (32'I(TTBL.b32 & 0x80) != 0 ? 32'U(S0.b32 & S1.b32 & S2.b32) : 0U)); + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(0) + tmp = Reg((tmp | ((TTBL.b32 & 0x1) != 0 ? (~S0.b32 & ~S1.b32 & ~S2.b32) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x2) != 0 ? (~S0.b32 & ~S1.b32 & S2.b32) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x4) != 0 ? (~S0.b32 & S1.b32 & ~S2.b32) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x8) != 0 ? (~S0.b32 & S1.b32 & S2.b32) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x10) != 0 ? (S0.b32 & ~S1.b32 & ~S2.b32) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x20) != 0 ? (S0.b32 & ~S1.b32 & S2.b32) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x40) != 0 ? (S0.b32 & S1.b32 & ~S2.b32) : 0))) + tmp = Reg((tmp | ((TTBL.b32 & 0x80) != 0 ? (S0.b32 & S1.b32 & S2.b32) : 0))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # tmp0 = f32_to_fp8_scale(S0.f32, scale.u8); + # tmp1 = f32_to_fp8_scale(S1.f32, scale.u8); + # dstword = OPSEL[3].i32 * 16; + # // Other destination bits are preserved + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + tmp0 = f32_to_fp8_scale(S0.f32, scale.u8) + tmp1 = f32_to_fp8_scale(S1.f32, scale.u8) + dstword = OPSEL[3].i32 * 16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # tmp0 = f32_to_bf8_scale(S0.f32, scale.u8); + # tmp1 = f32_to_bf8_scale(S1.f32, scale.u8); + # dstword = OPSEL[3].i32 * 16; + # // Other destination bits are preserved + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + tmp0 = f32_to_bf8_scale(S0.f32, scale.u8) + tmp1 = f32_to_bf8_scale(S1.f32, scale.u8) + dstword = OPSEL[3].i32 * 16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # tmp = f32_to_fp8_sr_scale(S0.f32, S1.u32, scale.u8); + # dstbyte = OPSEL[3 : 2].i32 * 8; + # // Other destination bits are preserved + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + tmp = Reg(f32_to_fp8_sr_scale(S0.f32, S1.u32, scale.u8)) + dstbyte = OPSEL[3 : 2].i32 * 8 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # tmp = f32_to_bf8_sr_scale(S0.f32, S1.u32, scale.u8); + # dstbyte = OPSEL[3 : 2].i32 * 8; + # // Other destination bits are preserved + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + tmp = Reg(f32_to_bf8_sr_scale(S0.f32, S1.u32, scale.u8)) + dstbyte = OPSEL[3 : 2].i32 * 8 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # srcword = OPSEL[0].i32 * 16; + # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; + # D0[31 : 0].f32 = tmp0; + # D0[63 : 32].f32 = tmp1 + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + srcword = OPSEL[0].i32 * 16 + src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 + D0[31 : 0].f32 = tmp0 + D0[63 : 32].f32 = tmp1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # srcword = OPSEL[0].i32 * 16; + # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; + # D0[31 : 0].f32 = tmp0; + # D0[63 : 32].f32 = tmp1 + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + srcword = OPSEL[0].i32 * 16 + src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 + D0[31 : 0].f32 = tmp0 + D0[63 : 32].f32 = tmp1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # srcbyte = OPSEL[1 : 0].i32 * 8; + # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8; + # tmp = fp8_to_f32_scale(src, scale.u8); + S1 = Reg(s1) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + srcbyte = OPSEL[1 : 0].i32 * 8 + src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8 + tmp = Reg(fp8_to_f32_scale(src, scale.u8)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # srcbyte = OPSEL[1 : 0].i32 * 8; + # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8; + # tmp = bf8_to_f32_scale(src, scale.u8); + S1 = Reg(s1) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + srcbyte = OPSEL[1 : 0].i32 * 8 + src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8 + tmp = Reg(bf8_to_f32_scale(src, scale.u8)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # tmp0 = f32_to_fp4_scale(S0.f32, scale.u8); + # tmp1 = f32_to_fp4_scale(S1.f32, scale.u8); + # dstbyte = OPSEL[3 : 2].i32 * 8; + # // Other destination bits are preserved + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + tmp0 = f32_to_fp4_scale(S0.f32, scale.u8) + tmp1 = f32_to_fp4_scale(S1.f32, scale.u8) + dstbyte = OPSEL[3 : 2].i32 * 8 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # randomVal = S1.u32; + # tmp0 = f32_to_fp4_sr_scale(S0[31 : 0].f32, randomVal, scale.u8); + # tmp1 = f32_to_fp4_sr_scale(S0[63 : 32].f32, randomVal, scale.u8); + # dstbyte = OPSEL[3 : 2].i32 * 8; + # // Other destination bits are preserved + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + randomVal = S1.u32 + tmp0 = f32_to_fp4_sr_scale(S0[31 : 0].f32, randomVal, scale.u8) + tmp1 = f32_to_fp4_sr_scale(S0[63 : 32].f32, randomVal, scale.u8) + dstbyte = OPSEL[3 : 2].i32 * 8 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # srcbyte = OPSEL[1 : 0].i32 * 8; + # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; + # D0[31 : 0].f32 = tmp0; + # D0[63 : 32].f32 = tmp1 + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + srcbyte = OPSEL[1 : 0].i32 * 8 + src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8 + D0[31 : 0].f32 = tmp0 + D0[63 : 32].f32 = tmp1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # tmp0 = f16_to_fp8_scale(S0[15 : 0].f16, scale.u8); + # tmp1 = f16_to_fp8_scale(S0[31 : 16].f16, scale.u8); + # dstword = OPSEL[3].i32 * 16; + # // Other destination bits are preserved + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + tmp0 = f16_to_fp8_scale(S0[15 : 0].f16, scale.u8) + tmp1 = f16_to_fp8_scale(S0[31 : 16].f16, scale.u8) + dstword = OPSEL[3].i32 * 16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # tmp0 = f16_to_bf8_scale(S0[15 : 0].f16, scale.u8); + # tmp1 = f16_to_bf8_scale(S0[31 : 16].f16, scale.u8); + # dstword = OPSEL[3].i32 * 16; + # // Other destination bits are preserved + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + tmp0 = f16_to_bf8_scale(S0[15 : 0].f16, scale.u8) + tmp1 = f16_to_bf8_scale(S0[31 : 16].f16, scale.u8) + dstword = OPSEL[3].i32 * 16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # tmp = f16_to_fp8_sr_scale(S0.f16, S1.u32, scale.u8); + # dstbyte = OPSEL[3 : 2].i32 * 8; + # // Other destination bits are preserved + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + tmp = Reg(f16_to_fp8_sr_scale(S0.f16, S1.u32, scale.u8)) + dstbyte = OPSEL[3 : 2].i32 * 8 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # tmp = f16_to_bf8_sr_scale(S0.f16, S1.u32, scale.u8); + # dstbyte = OPSEL[3 : 2].i32 * 8; + # // Other destination bits are preserved + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + tmp = Reg(f16_to_bf8_sr_scale(S0.f16, S1.u32, scale.u8)) + dstbyte = OPSEL[3 : 2].i32 * 8 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # srcword = OPSEL[0].i32 * 16; + # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; + # D0[15 : 0].f16 = tmp0; + # D0[31 : 16].f16 = tmp1 + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + srcword = OPSEL[0].i32 * 16 + src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 + D0[15 : 0].f16 = tmp0 + D0[31 : 16].f16 = tmp1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # srcword = OPSEL[0].i32 * 16; + # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; + # D0[15 : 0].f16 = tmp0; + # D0[31 : 16].f16 = tmp1 + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + srcword = OPSEL[0].i32 * 16 + src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 + D0[15 : 0].f16 = tmp0 + D0[31 : 16].f16 = tmp1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # srcbyte = OPSEL[1 : 0].i32 * 8; + # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8; + # tmp = fp8_to_f16_scale(src, scale.u8); + # // OPSEL[3] controls destination hi/lo + S1 = Reg(s1) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + srcbyte = OPSEL[1 : 0].i32 * 8 + src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8 + tmp = Reg(fp8_to_f16_scale(src, scale.u8)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # srcbyte = OPSEL[1 : 0].i32 * 8; + # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8; + # tmp = bf8_to_f16_scale(src, scale.u8); + # // OPSEL[3] controls destination hi/lo + S1 = Reg(s1) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + srcbyte = OPSEL[1 : 0].i32 * 8 + src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8 + tmp = Reg(bf8_to_f16_scale(src, scale.u8)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # tmp0 = f16_to_fp4_scale(S0[15 : 0].f16, scale.u8); + # tmp1 = f16_to_fp4_scale(S0[31 : 16].f16, scale.u8); + # dstbyte = OPSEL[3 : 2].i32 * 8; + # // Other destination bits are preserved + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + tmp0 = f16_to_fp4_scale(S0[15 : 0].f16, scale.u8) + tmp1 = f16_to_fp4_scale(S0[31 : 16].f16, scale.u8) + dstbyte = OPSEL[3 : 2].i32 * 8 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # randomVal = S1.u32; + # tmp0 = f16_to_fp4_sr_scale(S0[15 : 0].f16, randomVal, scale.u8); + # tmp1 = f16_to_fp4_sr_scale(S0[31 : 16].f16, randomVal, scale.u8); + # dstbyte = OPSEL[3 : 2].i32 * 8; + # // Other destination bits are preserved + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + randomVal = S1.u32 + tmp0 = f16_to_fp4_sr_scale(S0[15 : 0].f16, randomVal, scale.u8) + tmp1 = f16_to_fp4_sr_scale(S0[31 : 16].f16, randomVal, scale.u8) + dstbyte = OPSEL[3 : 2].i32 * 8 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # srcbyte = OPSEL[1 : 0].i32 * 8; + # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; + # D0[15 : 0].f16 = tmp0; + # D0[31 : 16].f16 = tmp1 + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + srcbyte = OPSEL[1 : 0].i32 * 8 + src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8 + D0[15 : 0].f16 = tmp0 + D0[31 : 16].f16 = tmp1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_2XPK16_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # declare tmp : 192'B; + # for pass in 0 : 15 do + # // Note that S0 and S1 inputs are interleaved in the packed result. + # tmp[dOffset + 5 : dOffset].fp6 = f32_to_fp6_scale(S0[sOffset + 31 : sOffset].f32, scale.u8); + # tmp[dOffset + 11 : dOffset + 6].fp6 = f32_to_fp6_scale(S1[sOffset + 31 : sOffset].f32, scale.u8) + # endfor; + # D0[191 : 0] = tmp.b192 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + for pass in range(0, int(15)+1): + tmp[dOffset + 5 : dOffset].fp6 = f32_to_fp6_scale(S0[sOffset + 31 : sOffset].f32, scale.u8) + tmp[dOffset + 11 : dOffset + 6].fp6 = f32_to_fp6_scale(S1[sOffset + 31 : sOffset].f32, scale.u8) + D0[191 : 0] = tmp.b192 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_2XPK16_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # declare tmp : 192'B; + # for pass in 0 : 15 do + # // Note that S0 and S1 inputs are interleaved in the packed result. + # tmp[dOffset + 5 : dOffset].bf6 = f32_to_bf6_scale(S0[sOffset + 31 : sOffset].f32, scale.u8); + # tmp[dOffset + 11 : dOffset + 6].bf6 = f32_to_bf6_scale(S1[sOffset + 31 : sOffset].f32, scale.u8) + # endfor; + # D0[191 : 0] = tmp.b192 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + for pass in range(0, int(15)+1): + tmp[dOffset + 5 : dOffset].bf6 = f32_to_bf6_scale(S0[sOffset + 31 : sOffset].f32, scale.u8) + tmp[dOffset + 11 : dOffset + 6].bf6 = f32_to_bf6_scale(S1[sOffset + 31 : sOffset].f32, scale.u8) + D0[191 : 0] = tmp.b192 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # randomVal = S1.u32; + # declare tmp : 192'B; + # for pass in 0 : 31 do + # tmp[dOffset + 5 : dOffset].fp6 = f32_to_fp6_sr_scale(S0[sOffset + 31 : sOffset].f32, randomVal, + # endfor; + # D0[191 : 0] = tmp.b192 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + randomVal = S1.u32 + for pass in range(0, int(31)+1): + tmp[dOffset + 5 : dOffset].fp6 = f32_to_fp6_sr_scale(S0[sOffset + 31 : sOffset].f32, randomVal, endfor; D0[191 : 0] = tmp.b192 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # randomVal = S1.u32; + # declare tmp : 192'B; + # for pass in 0 : 31 do + # tmp[dOffset + 5 : dOffset].bf6 = f32_to_bf6_sr_scale(S0[sOffset + 31 : sOffset].f32, randomVal, + # endfor; + # D0[191 : 0] = tmp.b192 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + randomVal = S1.u32 + for pass in range(0, int(31)+1): + tmp[dOffset + 5 : dOffset].bf6 = f32_to_bf6_sr_scale(S0[sOffset + 31 : sOffset].f32, randomVal, endfor; D0[191 : 0] = tmp.b192 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # declare tmp : 1024'B; + # for pass in 0 : 31 do + # tmp[dOffset + 31 : dOffset].f32 = fp6_to_f32_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) + # endfor; + # D0[1023 : 0] = tmp.b1024 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + for pass in range(0, int(31)+1): + tmp[dOffset + 31 : dOffset].f32 = fp6_to_f32_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) + D0[1023 : 0] = tmp.b1024 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # declare tmp : 1024'B; + # for pass in 0 : 31 do + # tmp[dOffset + 31 : dOffset].f32 = bf6_to_f32_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) + # endfor; + # D0[1023 : 0] = tmp.b1024 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + for pass in range(0, int(31)+1): + tmp[dOffset + 31 : dOffset].f32 = bf6_to_f32_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) + D0[1023 : 0] = tmp.b1024 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # declare tmp : 192'B; + # for pass in 0 : 31 do + # tmp[dOffset + 5 : dOffset].bf6 = f16_to_bf6_scale(S0[sOffset + 15 : sOffset].f16, scale.u8) + # endfor; + # D0[191 : 0] = tmp.b192 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + for pass in range(0, int(31)+1): + tmp[dOffset + 5 : dOffset].bf6 = f16_to_bf6_scale(S0[sOffset + 15 : sOffset].f16, scale.u8) + D0[191 : 0] = tmp.b192 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # randomVal = S1.u32; + # declare tmp : 192'B; + # for pass in 0 : 31 do + # tmp[dOffset + 5 : dOffset].fp6 = f16_to_fp6_sr_scale(S0[sOffset + 15 : sOffset].f16, randomVal, + # endfor; + # D0[191 : 0] = tmp.b192 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + randomVal = S1.u32 + for pass in range(0, int(31)+1): + tmp[dOffset + 5 : dOffset].fp6 = f16_to_fp6_sr_scale(S0[sOffset + 15 : sOffset].f16, randomVal, endfor; D0[191 : 0] = tmp.b192 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S2.f32)); + # randomVal = S1.u32; + # declare tmp : 192'B; + # for pass in 0 : 31 do + # tmp[dOffset + 5 : dOffset].bf6 = f16_to_bf6_sr_scale(S0[sOffset + 15 : sOffset].f16, randomVal, + # endfor; + # D0[191 : 0] = tmp.b192 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S2.f32)) + randomVal = S1.u32 + for pass in range(0, int(31)+1): + tmp[dOffset + 5 : dOffset].bf6 = f16_to_bf6_sr_scale(S0[sOffset + 15 : sOffset].f16, randomVal, endfor; D0[191 : 0] = tmp.b192 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # declare tmp : 512'B; + # for pass in 0 : 31 do + # tmp[dOffset + 15 : dOffset].f16 = fp6_to_f16_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) + # endfor; + # D0[511 : 0] = tmp.b512 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + for pass in range(0, int(31)+1): + tmp[dOffset + 15 : dOffset].f16 = fp6_to_f16_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) + D0[511 : 0] = tmp.b512 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # scale = 32'U(exponent(S1.f32)); + # declare tmp : 512'B; + # for pass in 0 : 31 do + # tmp[dOffset + 15 : dOffset].f16 = bf6_to_f16_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) + # endfor; + # D0[511 : 0] = tmp.b512 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + scale = (exponent(S1.f32)) + for pass in range(0, int(31)+1): + tmp[dOffset + 15 : dOffset].f16 = bf6_to_f16_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) + D0[511 : 0] = tmp.b512 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ASHR_PK_I8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if n <= -128 then + # elsif n >= 127 then + # else + # endif); + # declare tmp : 16'B; + # tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32); + # tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32); + # D0[15 : 0] = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + if n <= -128: + pass + elif n >= 127: + pass + else: + pass + tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32) + tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32) + D0[15 : 0] = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ASHR_PK_U8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if n <= 0 then + # elsif n >= 255 then + # else + # endif); + # declare tmp : 16'B; + # tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32); + # tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32); + # D0[15 : 0] = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + if n <= 0: + pass + elif n >= 255: + pass + else: + pass + tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32) + tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32) + D0[15 : 0] = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_PK_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # prev_mode = ROUND_MODE; + # tmp[15 : 0].f16 = f32_to_f16(S0.f32); + # tmp[31 : 16].f16 = f32_to_f16(S1.f32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + prev_mode = ROUND_MODE + tmp[15 : 0].f16 = f32_to_f16(S0.f32) + tmp[31 : 16].f16 = f32_to_f16(S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = S0.f64 + S1.f64 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = S0.f64 + S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = S0.f64 * S1.f64 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = S0.f64 * S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (WAVE_MODE.IEEE && isSignalNAN(S0.f64)) then + # D0.f64 = cvtToQuietNAN(S0.f64) + # elsif (WAVE_MODE.IEEE && isSignalNAN(S1.f64)) then + # D0.f64 = cvtToQuietNAN(S1.f64) + # elsif isNAN(S0.f64) then + # D0.f64 = S1.f64 + # elsif isNAN(S1.f64) then + # D0.f64 = S0.f64 + # elsif ((S0.f64 == +0.0) && (S1.f64 == -0.0)) then + # D0.f64 = S1.f64 + # elsif ((S0.f64 == -0.0) && (S1.f64 == +0.0)) then + # D0.f64 = S0.f64 + # else + # D0.f64 = S0.f64 < S1.f64 ? S0.f64 : S1.f64 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (WAVE_MODE.IEEE and isSignalNAN(S0.f64)): + D0.f64 = cvtToQuietNAN(S0.f64) + elif (WAVE_MODE.IEEE and isSignalNAN(S1.f64)): + D0.f64 = cvtToQuietNAN(S1.f64) + elif isNAN(S0.f64): + D0.f64 = S1.f64 + elif isNAN(S1.f64): + D0.f64 = S0.f64 + elif ((S0.f64 == +0.0) and (S1.f64 == -0.0)): + D0.f64 = S1.f64 + elif ((S0.f64 == -0.0) and (S1.f64 == +0.0)): + D0.f64 = S0.f64 + else: + D0.f64 = ((S0.f64) if (S0.f64 < S1.f64) else (S1.f64)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (WAVE_MODE.IEEE && isSignalNAN(S0.f64)) then + # D0.f64 = cvtToQuietNAN(S0.f64) + # elsif (WAVE_MODE.IEEE && isSignalNAN(S1.f64)) then + # D0.f64 = cvtToQuietNAN(S1.f64) + # elsif isNAN(S0.f64) then + # D0.f64 = S1.f64 + # elsif isNAN(S1.f64) then + # D0.f64 = S0.f64 + # elsif ((S0.f64 == +0.0) && (S1.f64 == -0.0)) then + # D0.f64 = S0.f64 + # elsif ((S0.f64 == -0.0) && (S1.f64 == +0.0)) then + # D0.f64 = S1.f64 + # elsif WAVE_MODE.IEEE then + # D0.f64 = S0.f64 >= S1.f64 ? S0.f64 : S1.f64 + # else + # D0.f64 = S0.f64 > S1.f64 ? S0.f64 : S1.f64 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (WAVE_MODE.IEEE and isSignalNAN(S0.f64)): + D0.f64 = cvtToQuietNAN(S0.f64) + elif (WAVE_MODE.IEEE and isSignalNAN(S1.f64)): + D0.f64 = cvtToQuietNAN(S1.f64) + elif isNAN(S0.f64): + D0.f64 = S1.f64 + elif isNAN(S1.f64): + D0.f64 = S0.f64 + elif ((S0.f64 == +0.0) and (S1.f64 == -0.0)): + D0.f64 = S0.f64 + elif ((S0.f64 == -0.0) and (S1.f64 == +0.0)): + D0.f64 = S1.f64 + elif WAVE_MODE.IEEE: + D0.f64 = ((S0.f64) if (S0.f64 >= S1.f64) else (S1.f64)) + else: + D0.f64 = ((S0.f64) if (S0.f64 > S1.f64) else (S1.f64)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = S0.f64 * 2.0 ** S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = S0.f64 * 2.0 ** S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 * S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S0.u32 * S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (((S0.u32) * (S1.u32)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (((S0.i32) * (S1.i32)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 * 2.0F ** S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 * 2.0 ** S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # lane = S1.u32[5 : 0]; + # // Lane select + # D0.b32 = VGPR[lane][SRC0.u32] + S1 = Reg(s1) + D0 = Reg(d0) + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + lane = S1.u32[5 : 0] + D0.b32 = VGPR[lane][SRC0.u32] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S1.u32; + # for i in 0 : 31 do + # tmp += S0[i].u32; + # // count i'th bit + # endfor; + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S1.u32) + for i in range(0, int(31)+1): + tmp += S0[i].u32 + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S1.u64 << S0[5 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64 = (S1.u64 << S0[5 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S1.u64 >> S0[5 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64 = (S1.u64 >> S0[5 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i64 = (S1.i64 >> S0[5 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i64 = (S1.i64 >> S0[5 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3AOp_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_PKNORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); + # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = f32_to_snorm(S0.f32) + tmp[31 : 16].i16 = f32_to_snorm(S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_PKNORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); + # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = f32_to_unorm(S0.f32) + tmp[31 : 16].u16 = f32_to_unorm(S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_PKRTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # prev_mode = ROUND_MODE; + # tmp[15 : 0].f16 = f32_to_f16(S0.f32); + # tmp[31 : 16].f16 = f32_to_f16(S1.f32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + prev_mode = ROUND_MODE + tmp[15 : 0].f16 = f32_to_f16(S0.f32) + tmp[31 : 16].f16 = f32_to_f16(S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = u32_to_u16(S0.u32); + # tmp[31 : 16].u16 = u32_to_u16(S1.u32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = u32_to_u16(S0.u32) + tmp[31 : 16].u16 = u32_to_u16(S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = i32_to_i16(S0.i32); + # tmp[31 : 16].i16 = i32_to_i16(S1.i32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = i32_to_i16(S0.i32) + tmp[31 : 16].i16 = i32_to_i16(S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_PKNORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); + # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = f16_to_snorm(S0.f16) + tmp[31 : 16].i16 = f16_to_snorm(S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_CVT_PKNORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); + # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = f16_to_unorm(S0.f16) + tmp[31 : 16].u16 = f16_to_unorm(S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 + S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = S0.i32 + S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 - S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = S0.i32 - S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = S0.i16 + S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = S0.i16 + S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = S0.i16 - S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = S0.i16 - S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0[31 : 16].f16 = S1.f16; + # D0[15 : 0].f16 = S0.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0[31 : 16].f16 = S1.f16 + D0[15 : 0].f16 = S0.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MUL_LEGACY_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then + # // DX9 rules, 0.0 * x = 0.0 + # D0.f32 = 0.0F + # else + # D0.f32 = S0.f32 * S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): + D0.f32 = 0.0 + else: + D0.f32 = S0.f32 * S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 32'F(v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32)) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = F(v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 32'F(v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32)) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = F(v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +VOP3AOp_FUNCTIONS = { + VOP3AOp.V_CMP_CLASS_F32: _VOP3AOp_V_CMP_CLASS_F32, + VOP3AOp.V_CMPX_CLASS_F32: _VOP3AOp_V_CMPX_CLASS_F32, + VOP3AOp.V_CMP_CLASS_F64: _VOP3AOp_V_CMP_CLASS_F64, + VOP3AOp.V_CMPX_CLASS_F64: _VOP3AOp_V_CMPX_CLASS_F64, + VOP3AOp.V_CMP_CLASS_F16: _VOP3AOp_V_CMP_CLASS_F16, + VOP3AOp.V_CMPX_CLASS_F16: _VOP3AOp_V_CMPX_CLASS_F16, + VOP3AOp.V_CMP_F_F16: _VOP3AOp_V_CMP_F_F16, + VOP3AOp.V_CMP_LT_F16: _VOP3AOp_V_CMP_LT_F16, + VOP3AOp.V_CMP_EQ_F16: _VOP3AOp_V_CMP_EQ_F16, + VOP3AOp.V_CMP_LE_F16: _VOP3AOp_V_CMP_LE_F16, + VOP3AOp.V_CMP_GT_F16: _VOP3AOp_V_CMP_GT_F16, + VOP3AOp.V_CMP_LG_F16: _VOP3AOp_V_CMP_LG_F16, + VOP3AOp.V_CMP_GE_F16: _VOP3AOp_V_CMP_GE_F16, + VOP3AOp.V_CMP_O_F16: _VOP3AOp_V_CMP_O_F16, + VOP3AOp.V_CMP_U_F16: _VOP3AOp_V_CMP_U_F16, + VOP3AOp.V_CMP_NGE_F16: _VOP3AOp_V_CMP_NGE_F16, + VOP3AOp.V_CMP_NLG_F16: _VOP3AOp_V_CMP_NLG_F16, + VOP3AOp.V_CMP_NGT_F16: _VOP3AOp_V_CMP_NGT_F16, + VOP3AOp.V_CMP_NLE_F16: _VOP3AOp_V_CMP_NLE_F16, + VOP3AOp.V_CMP_NEQ_F16: _VOP3AOp_V_CMP_NEQ_F16, + VOP3AOp.V_CMP_NLT_F16: _VOP3AOp_V_CMP_NLT_F16, + VOP3AOp.V_CMP_TRU_F16: _VOP3AOp_V_CMP_TRU_F16, + VOP3AOp.V_CMPX_F_F16: _VOP3AOp_V_CMPX_F_F16, + VOP3AOp.V_CMPX_LT_F16: _VOP3AOp_V_CMPX_LT_F16, + VOP3AOp.V_CMPX_EQ_F16: _VOP3AOp_V_CMPX_EQ_F16, + VOP3AOp.V_CMPX_LE_F16: _VOP3AOp_V_CMPX_LE_F16, + VOP3AOp.V_CMPX_GT_F16: _VOP3AOp_V_CMPX_GT_F16, + VOP3AOp.V_CMPX_LG_F16: _VOP3AOp_V_CMPX_LG_F16, + VOP3AOp.V_CMPX_GE_F16: _VOP3AOp_V_CMPX_GE_F16, + VOP3AOp.V_CMPX_O_F16: _VOP3AOp_V_CMPX_O_F16, + VOP3AOp.V_CMPX_U_F16: _VOP3AOp_V_CMPX_U_F16, + VOP3AOp.V_CMPX_NGE_F16: _VOP3AOp_V_CMPX_NGE_F16, + VOP3AOp.V_CMPX_NLG_F16: _VOP3AOp_V_CMPX_NLG_F16, + VOP3AOp.V_CMPX_NGT_F16: _VOP3AOp_V_CMPX_NGT_F16, + VOP3AOp.V_CMPX_NLE_F16: _VOP3AOp_V_CMPX_NLE_F16, + VOP3AOp.V_CMPX_NEQ_F16: _VOP3AOp_V_CMPX_NEQ_F16, + VOP3AOp.V_CMPX_NLT_F16: _VOP3AOp_V_CMPX_NLT_F16, + VOP3AOp.V_CMPX_TRU_F16: _VOP3AOp_V_CMPX_TRU_F16, + VOP3AOp.V_CMP_F_F32: _VOP3AOp_V_CMP_F_F32, + VOP3AOp.V_CMP_LT_F32: _VOP3AOp_V_CMP_LT_F32, + VOP3AOp.V_CMP_EQ_F32: _VOP3AOp_V_CMP_EQ_F32, + VOP3AOp.V_CMP_LE_F32: _VOP3AOp_V_CMP_LE_F32, + VOP3AOp.V_CMP_GT_F32: _VOP3AOp_V_CMP_GT_F32, + VOP3AOp.V_CMP_LG_F32: _VOP3AOp_V_CMP_LG_F32, + VOP3AOp.V_CMP_GE_F32: _VOP3AOp_V_CMP_GE_F32, + VOP3AOp.V_CMP_O_F32: _VOP3AOp_V_CMP_O_F32, + VOP3AOp.V_CMP_U_F32: _VOP3AOp_V_CMP_U_F32, + VOP3AOp.V_CMP_NGE_F32: _VOP3AOp_V_CMP_NGE_F32, + VOP3AOp.V_CMP_NLG_F32: _VOP3AOp_V_CMP_NLG_F32, + VOP3AOp.V_CMP_NGT_F32: _VOP3AOp_V_CMP_NGT_F32, + VOP3AOp.V_CMP_NLE_F32: _VOP3AOp_V_CMP_NLE_F32, + VOP3AOp.V_CMP_NEQ_F32: _VOP3AOp_V_CMP_NEQ_F32, + VOP3AOp.V_CMP_NLT_F32: _VOP3AOp_V_CMP_NLT_F32, + VOP3AOp.V_CMP_TRU_F32: _VOP3AOp_V_CMP_TRU_F32, + VOP3AOp.V_CMPX_F_F32: _VOP3AOp_V_CMPX_F_F32, + VOP3AOp.V_CMPX_LT_F32: _VOP3AOp_V_CMPX_LT_F32, + VOP3AOp.V_CMPX_EQ_F32: _VOP3AOp_V_CMPX_EQ_F32, + VOP3AOp.V_CMPX_LE_F32: _VOP3AOp_V_CMPX_LE_F32, + VOP3AOp.V_CMPX_GT_F32: _VOP3AOp_V_CMPX_GT_F32, + VOP3AOp.V_CMPX_LG_F32: _VOP3AOp_V_CMPX_LG_F32, + VOP3AOp.V_CMPX_GE_F32: _VOP3AOp_V_CMPX_GE_F32, + VOP3AOp.V_CMPX_O_F32: _VOP3AOp_V_CMPX_O_F32, + VOP3AOp.V_CMPX_U_F32: _VOP3AOp_V_CMPX_U_F32, + VOP3AOp.V_CMPX_NGE_F32: _VOP3AOp_V_CMPX_NGE_F32, + VOP3AOp.V_CMPX_NLG_F32: _VOP3AOp_V_CMPX_NLG_F32, + VOP3AOp.V_CMPX_NGT_F32: _VOP3AOp_V_CMPX_NGT_F32, + VOP3AOp.V_CMPX_NLE_F32: _VOP3AOp_V_CMPX_NLE_F32, + VOP3AOp.V_CMPX_NEQ_F32: _VOP3AOp_V_CMPX_NEQ_F32, + VOP3AOp.V_CMPX_NLT_F32: _VOP3AOp_V_CMPX_NLT_F32, + VOP3AOp.V_CMPX_TRU_F32: _VOP3AOp_V_CMPX_TRU_F32, + VOP3AOp.V_CMP_F_F64: _VOP3AOp_V_CMP_F_F64, + VOP3AOp.V_CMP_LT_F64: _VOP3AOp_V_CMP_LT_F64, + VOP3AOp.V_CMP_EQ_F64: _VOP3AOp_V_CMP_EQ_F64, + VOP3AOp.V_CMP_LE_F64: _VOP3AOp_V_CMP_LE_F64, + VOP3AOp.V_CMP_GT_F64: _VOP3AOp_V_CMP_GT_F64, + VOP3AOp.V_CMP_LG_F64: _VOP3AOp_V_CMP_LG_F64, + VOP3AOp.V_CMP_GE_F64: _VOP3AOp_V_CMP_GE_F64, + VOP3AOp.V_CMP_O_F64: _VOP3AOp_V_CMP_O_F64, + VOP3AOp.V_CMP_U_F64: _VOP3AOp_V_CMP_U_F64, + VOP3AOp.V_CMP_NGE_F64: _VOP3AOp_V_CMP_NGE_F64, + VOP3AOp.V_CMP_NLG_F64: _VOP3AOp_V_CMP_NLG_F64, + VOP3AOp.V_CMP_NGT_F64: _VOP3AOp_V_CMP_NGT_F64, + VOP3AOp.V_CMP_NLE_F64: _VOP3AOp_V_CMP_NLE_F64, + VOP3AOp.V_CMP_NEQ_F64: _VOP3AOp_V_CMP_NEQ_F64, + VOP3AOp.V_CMP_NLT_F64: _VOP3AOp_V_CMP_NLT_F64, + VOP3AOp.V_CMP_TRU_F64: _VOP3AOp_V_CMP_TRU_F64, + VOP3AOp.V_CMPX_F_F64: _VOP3AOp_V_CMPX_F_F64, + VOP3AOp.V_CMPX_LT_F64: _VOP3AOp_V_CMPX_LT_F64, + VOP3AOp.V_CMPX_EQ_F64: _VOP3AOp_V_CMPX_EQ_F64, + VOP3AOp.V_CMPX_LE_F64: _VOP3AOp_V_CMPX_LE_F64, + VOP3AOp.V_CMPX_GT_F64: _VOP3AOp_V_CMPX_GT_F64, + VOP3AOp.V_CMPX_LG_F64: _VOP3AOp_V_CMPX_LG_F64, + VOP3AOp.V_CMPX_GE_F64: _VOP3AOp_V_CMPX_GE_F64, + VOP3AOp.V_CMPX_O_F64: _VOP3AOp_V_CMPX_O_F64, + VOP3AOp.V_CMPX_U_F64: _VOP3AOp_V_CMPX_U_F64, + VOP3AOp.V_CMPX_NGE_F64: _VOP3AOp_V_CMPX_NGE_F64, + VOP3AOp.V_CMPX_NLG_F64: _VOP3AOp_V_CMPX_NLG_F64, + VOP3AOp.V_CMPX_NGT_F64: _VOP3AOp_V_CMPX_NGT_F64, + VOP3AOp.V_CMPX_NLE_F64: _VOP3AOp_V_CMPX_NLE_F64, + VOP3AOp.V_CMPX_NEQ_F64: _VOP3AOp_V_CMPX_NEQ_F64, + VOP3AOp.V_CMPX_NLT_F64: _VOP3AOp_V_CMPX_NLT_F64, + VOP3AOp.V_CMPX_TRU_F64: _VOP3AOp_V_CMPX_TRU_F64, + VOP3AOp.V_CMP_F_I16: _VOP3AOp_V_CMP_F_I16, + VOP3AOp.V_CMP_LT_I16: _VOP3AOp_V_CMP_LT_I16, + VOP3AOp.V_CMP_EQ_I16: _VOP3AOp_V_CMP_EQ_I16, + VOP3AOp.V_CMP_LE_I16: _VOP3AOp_V_CMP_LE_I16, + VOP3AOp.V_CMP_GT_I16: _VOP3AOp_V_CMP_GT_I16, + VOP3AOp.V_CMP_NE_I16: _VOP3AOp_V_CMP_NE_I16, + VOP3AOp.V_CMP_GE_I16: _VOP3AOp_V_CMP_GE_I16, + VOP3AOp.V_CMP_T_I16: _VOP3AOp_V_CMP_T_I16, + VOP3AOp.V_CMP_F_U16: _VOP3AOp_V_CMP_F_U16, + VOP3AOp.V_CMP_LT_U16: _VOP3AOp_V_CMP_LT_U16, + VOP3AOp.V_CMP_EQ_U16: _VOP3AOp_V_CMP_EQ_U16, + VOP3AOp.V_CMP_LE_U16: _VOP3AOp_V_CMP_LE_U16, + VOP3AOp.V_CMP_GT_U16: _VOP3AOp_V_CMP_GT_U16, + VOP3AOp.V_CMP_NE_U16: _VOP3AOp_V_CMP_NE_U16, + VOP3AOp.V_CMP_GE_U16: _VOP3AOp_V_CMP_GE_U16, + VOP3AOp.V_CMP_T_U16: _VOP3AOp_V_CMP_T_U16, + VOP3AOp.V_CMPX_F_I16: _VOP3AOp_V_CMPX_F_I16, + VOP3AOp.V_CMPX_LT_I16: _VOP3AOp_V_CMPX_LT_I16, + VOP3AOp.V_CMPX_EQ_I16: _VOP3AOp_V_CMPX_EQ_I16, + VOP3AOp.V_CMPX_LE_I16: _VOP3AOp_V_CMPX_LE_I16, + VOP3AOp.V_CMPX_GT_I16: _VOP3AOp_V_CMPX_GT_I16, + VOP3AOp.V_CMPX_NE_I16: _VOP3AOp_V_CMPX_NE_I16, + VOP3AOp.V_CMPX_GE_I16: _VOP3AOp_V_CMPX_GE_I16, + VOP3AOp.V_CMPX_T_I16: _VOP3AOp_V_CMPX_T_I16, + VOP3AOp.V_CMPX_F_U16: _VOP3AOp_V_CMPX_F_U16, + VOP3AOp.V_CMPX_LT_U16: _VOP3AOp_V_CMPX_LT_U16, + VOP3AOp.V_CMPX_EQ_U16: _VOP3AOp_V_CMPX_EQ_U16, + VOP3AOp.V_CMPX_LE_U16: _VOP3AOp_V_CMPX_LE_U16, + VOP3AOp.V_CMPX_GT_U16: _VOP3AOp_V_CMPX_GT_U16, + VOP3AOp.V_CMPX_NE_U16: _VOP3AOp_V_CMPX_NE_U16, + VOP3AOp.V_CMPX_GE_U16: _VOP3AOp_V_CMPX_GE_U16, + VOP3AOp.V_CMPX_T_U16: _VOP3AOp_V_CMPX_T_U16, + VOP3AOp.V_CMP_F_I32: _VOP3AOp_V_CMP_F_I32, + VOP3AOp.V_CMP_LT_I32: _VOP3AOp_V_CMP_LT_I32, + VOP3AOp.V_CMP_EQ_I32: _VOP3AOp_V_CMP_EQ_I32, + VOP3AOp.V_CMP_LE_I32: _VOP3AOp_V_CMP_LE_I32, + VOP3AOp.V_CMP_GT_I32: _VOP3AOp_V_CMP_GT_I32, + VOP3AOp.V_CMP_NE_I32: _VOP3AOp_V_CMP_NE_I32, + VOP3AOp.V_CMP_GE_I32: _VOP3AOp_V_CMP_GE_I32, + VOP3AOp.V_CMP_T_I32: _VOP3AOp_V_CMP_T_I32, + VOP3AOp.V_CMP_F_U32: _VOP3AOp_V_CMP_F_U32, + VOP3AOp.V_CMP_LT_U32: _VOP3AOp_V_CMP_LT_U32, + VOP3AOp.V_CMP_EQ_U32: _VOP3AOp_V_CMP_EQ_U32, + VOP3AOp.V_CMP_LE_U32: _VOP3AOp_V_CMP_LE_U32, + VOP3AOp.V_CMP_GT_U32: _VOP3AOp_V_CMP_GT_U32, + VOP3AOp.V_CMP_NE_U32: _VOP3AOp_V_CMP_NE_U32, + VOP3AOp.V_CMP_GE_U32: _VOP3AOp_V_CMP_GE_U32, + VOP3AOp.V_CMP_T_U32: _VOP3AOp_V_CMP_T_U32, + VOP3AOp.V_CMPX_F_I32: _VOP3AOp_V_CMPX_F_I32, + VOP3AOp.V_CMPX_LT_I32: _VOP3AOp_V_CMPX_LT_I32, + VOP3AOp.V_CMPX_EQ_I32: _VOP3AOp_V_CMPX_EQ_I32, + VOP3AOp.V_CMPX_LE_I32: _VOP3AOp_V_CMPX_LE_I32, + VOP3AOp.V_CMPX_GT_I32: _VOP3AOp_V_CMPX_GT_I32, + VOP3AOp.V_CMPX_NE_I32: _VOP3AOp_V_CMPX_NE_I32, + VOP3AOp.V_CMPX_GE_I32: _VOP3AOp_V_CMPX_GE_I32, + VOP3AOp.V_CMPX_T_I32: _VOP3AOp_V_CMPX_T_I32, + VOP3AOp.V_CMPX_F_U32: _VOP3AOp_V_CMPX_F_U32, + VOP3AOp.V_CMPX_LT_U32: _VOP3AOp_V_CMPX_LT_U32, + VOP3AOp.V_CMPX_EQ_U32: _VOP3AOp_V_CMPX_EQ_U32, + VOP3AOp.V_CMPX_LE_U32: _VOP3AOp_V_CMPX_LE_U32, + VOP3AOp.V_CMPX_GT_U32: _VOP3AOp_V_CMPX_GT_U32, + VOP3AOp.V_CMPX_NE_U32: _VOP3AOp_V_CMPX_NE_U32, + VOP3AOp.V_CMPX_GE_U32: _VOP3AOp_V_CMPX_GE_U32, + VOP3AOp.V_CMPX_T_U32: _VOP3AOp_V_CMPX_T_U32, + VOP3AOp.V_CMP_F_I64: _VOP3AOp_V_CMP_F_I64, + VOP3AOp.V_CMP_LT_I64: _VOP3AOp_V_CMP_LT_I64, + VOP3AOp.V_CMP_EQ_I64: _VOP3AOp_V_CMP_EQ_I64, + VOP3AOp.V_CMP_LE_I64: _VOP3AOp_V_CMP_LE_I64, + VOP3AOp.V_CMP_GT_I64: _VOP3AOp_V_CMP_GT_I64, + VOP3AOp.V_CMP_NE_I64: _VOP3AOp_V_CMP_NE_I64, + VOP3AOp.V_CMP_GE_I64: _VOP3AOp_V_CMP_GE_I64, + VOP3AOp.V_CMP_T_I64: _VOP3AOp_V_CMP_T_I64, + VOP3AOp.V_CMP_F_U64: _VOP3AOp_V_CMP_F_U64, + VOP3AOp.V_CMP_LT_U64: _VOP3AOp_V_CMP_LT_U64, + VOP3AOp.V_CMP_EQ_U64: _VOP3AOp_V_CMP_EQ_U64, + VOP3AOp.V_CMP_LE_U64: _VOP3AOp_V_CMP_LE_U64, + VOP3AOp.V_CMP_GT_U64: _VOP3AOp_V_CMP_GT_U64, + VOP3AOp.V_CMP_NE_U64: _VOP3AOp_V_CMP_NE_U64, + VOP3AOp.V_CMP_GE_U64: _VOP3AOp_V_CMP_GE_U64, + VOP3AOp.V_CMP_T_U64: _VOP3AOp_V_CMP_T_U64, + VOP3AOp.V_CMPX_F_I64: _VOP3AOp_V_CMPX_F_I64, + VOP3AOp.V_CMPX_LT_I64: _VOP3AOp_V_CMPX_LT_I64, + VOP3AOp.V_CMPX_EQ_I64: _VOP3AOp_V_CMPX_EQ_I64, + VOP3AOp.V_CMPX_LE_I64: _VOP3AOp_V_CMPX_LE_I64, + VOP3AOp.V_CMPX_GT_I64: _VOP3AOp_V_CMPX_GT_I64, + VOP3AOp.V_CMPX_NE_I64: _VOP3AOp_V_CMPX_NE_I64, + VOP3AOp.V_CMPX_GE_I64: _VOP3AOp_V_CMPX_GE_I64, + VOP3AOp.V_CMPX_T_I64: _VOP3AOp_V_CMPX_T_I64, + VOP3AOp.V_CMPX_F_U64: _VOP3AOp_V_CMPX_F_U64, + VOP3AOp.V_CMPX_LT_U64: _VOP3AOp_V_CMPX_LT_U64, + VOP3AOp.V_CMPX_EQ_U64: _VOP3AOp_V_CMPX_EQ_U64, + VOP3AOp.V_CMPX_LE_U64: _VOP3AOp_V_CMPX_LE_U64, + VOP3AOp.V_CMPX_GT_U64: _VOP3AOp_V_CMPX_GT_U64, + VOP3AOp.V_CMPX_NE_U64: _VOP3AOp_V_CMPX_NE_U64, + VOP3AOp.V_CMPX_GE_U64: _VOP3AOp_V_CMPX_GE_U64, + VOP3AOp.V_CMPX_T_U64: _VOP3AOp_V_CMPX_T_U64, + VOP3AOp.V_MOV_B32: _VOP3AOp_V_MOV_B32, + VOP3AOp.V_READFIRSTLANE_B32: _VOP3AOp_V_READFIRSTLANE_B32, + VOP3AOp.V_CVT_I32_F64: _VOP3AOp_V_CVT_I32_F64, + VOP3AOp.V_CVT_F64_I32: _VOP3AOp_V_CVT_F64_I32, + VOP3AOp.V_CVT_F32_I32: _VOP3AOp_V_CVT_F32_I32, + VOP3AOp.V_CVT_F32_U32: _VOP3AOp_V_CVT_F32_U32, + VOP3AOp.V_CVT_U32_F32: _VOP3AOp_V_CVT_U32_F32, + VOP3AOp.V_CVT_I32_F32: _VOP3AOp_V_CVT_I32_F32, + VOP3AOp.V_CVT_F16_F32: _VOP3AOp_V_CVT_F16_F32, + VOP3AOp.V_CVT_F32_F16: _VOP3AOp_V_CVT_F32_F16, + VOP3AOp.V_CVT_RPI_I32_F32: _VOP3AOp_V_CVT_RPI_I32_F32, + VOP3AOp.V_CVT_FLR_I32_F32: _VOP3AOp_V_CVT_FLR_I32_F32, + VOP3AOp.V_CVT_F32_F64: _VOP3AOp_V_CVT_F32_F64, + VOP3AOp.V_CVT_F64_F32: _VOP3AOp_V_CVT_F64_F32, + VOP3AOp.V_CVT_F32_UBYTE0: _VOP3AOp_V_CVT_F32_UBYTE0, + VOP3AOp.V_CVT_F32_UBYTE1: _VOP3AOp_V_CVT_F32_UBYTE1, + VOP3AOp.V_CVT_F32_UBYTE2: _VOP3AOp_V_CVT_F32_UBYTE2, + VOP3AOp.V_CVT_F32_UBYTE3: _VOP3AOp_V_CVT_F32_UBYTE3, + VOP3AOp.V_CVT_U32_F64: _VOP3AOp_V_CVT_U32_F64, + VOP3AOp.V_CVT_F64_U32: _VOP3AOp_V_CVT_F64_U32, + VOP3AOp.V_TRUNC_F64: _VOP3AOp_V_TRUNC_F64, + VOP3AOp.V_CEIL_F64: _VOP3AOp_V_CEIL_F64, + VOP3AOp.V_RNDNE_F64: _VOP3AOp_V_RNDNE_F64, + VOP3AOp.V_FLOOR_F64: _VOP3AOp_V_FLOOR_F64, + VOP3AOp.V_FRACT_F32: _VOP3AOp_V_FRACT_F32, + VOP3AOp.V_TRUNC_F32: _VOP3AOp_V_TRUNC_F32, + VOP3AOp.V_CEIL_F32: _VOP3AOp_V_CEIL_F32, + VOP3AOp.V_RNDNE_F32: _VOP3AOp_V_RNDNE_F32, + VOP3AOp.V_FLOOR_F32: _VOP3AOp_V_FLOOR_F32, + VOP3AOp.V_EXP_F32: _VOP3AOp_V_EXP_F32, + VOP3AOp.V_LOG_F32: _VOP3AOp_V_LOG_F32, + VOP3AOp.V_RCP_F32: _VOP3AOp_V_RCP_F32, + VOP3AOp.V_RCP_IFLAG_F32: _VOP3AOp_V_RCP_IFLAG_F32, + VOP3AOp.V_RSQ_F32: _VOP3AOp_V_RSQ_F32, + VOP3AOp.V_RCP_F64: _VOP3AOp_V_RCP_F64, + VOP3AOp.V_RSQ_F64: _VOP3AOp_V_RSQ_F64, + VOP3AOp.V_SQRT_F32: _VOP3AOp_V_SQRT_F32, + VOP3AOp.V_SQRT_F64: _VOP3AOp_V_SQRT_F64, + VOP3AOp.V_SIN_F32: _VOP3AOp_V_SIN_F32, + VOP3AOp.V_COS_F32: _VOP3AOp_V_COS_F32, + VOP3AOp.V_NOT_B32: _VOP3AOp_V_NOT_B32, + VOP3AOp.V_BFREV_B32: _VOP3AOp_V_BFREV_B32, + VOP3AOp.V_FFBH_U32: _VOP3AOp_V_FFBH_U32, + VOP3AOp.V_FFBL_B32: _VOP3AOp_V_FFBL_B32, + VOP3AOp.V_FFBH_I32: _VOP3AOp_V_FFBH_I32, + VOP3AOp.V_FREXP_EXP_I32_F64: _VOP3AOp_V_FREXP_EXP_I32_F64, + VOP3AOp.V_FREXP_MANT_F64: _VOP3AOp_V_FREXP_MANT_F64, + VOP3AOp.V_FRACT_F64: _VOP3AOp_V_FRACT_F64, + VOP3AOp.V_FREXP_EXP_I32_F32: _VOP3AOp_V_FREXP_EXP_I32_F32, + VOP3AOp.V_FREXP_MANT_F32: _VOP3AOp_V_FREXP_MANT_F32, + VOP3AOp.V_MOV_B64: _VOP3AOp_V_MOV_B64, + VOP3AOp.V_CVT_F16_U16: _VOP3AOp_V_CVT_F16_U16, + VOP3AOp.V_CVT_F16_I16: _VOP3AOp_V_CVT_F16_I16, + VOP3AOp.V_CVT_U16_F16: _VOP3AOp_V_CVT_U16_F16, + VOP3AOp.V_CVT_I16_F16: _VOP3AOp_V_CVT_I16_F16, + VOP3AOp.V_RCP_F16: _VOP3AOp_V_RCP_F16, + VOP3AOp.V_SQRT_F16: _VOP3AOp_V_SQRT_F16, + VOP3AOp.V_RSQ_F16: _VOP3AOp_V_RSQ_F16, + VOP3AOp.V_LOG_F16: _VOP3AOp_V_LOG_F16, + VOP3AOp.V_EXP_F16: _VOP3AOp_V_EXP_F16, + VOP3AOp.V_CNDMASK_B32: _VOP3AOp_V_CNDMASK_B32, + VOP3AOp.V_ADD_F32: _VOP3AOp_V_ADD_F32, + VOP3AOp.V_SUB_F32: _VOP3AOp_V_SUB_F32, + VOP3AOp.V_SUBREV_F32: _VOP3AOp_V_SUBREV_F32, + VOP3AOp.V_FMAC_F64: _VOP3AOp_V_FMAC_F64, + VOP3AOp.V_MUL_F32: _VOP3AOp_V_MUL_F32, + VOP3AOp.V_MUL_I32_I24: _VOP3AOp_V_MUL_I32_I24, + VOP3AOp.V_MUL_HI_I32_I24: _VOP3AOp_V_MUL_HI_I32_I24, + VOP3AOp.V_MUL_U32_U24: _VOP3AOp_V_MUL_U32_U24, + VOP3AOp.V_MUL_HI_U32_U24: _VOP3AOp_V_MUL_HI_U32_U24, + VOP3AOp.V_MIN_F32: _VOP3AOp_V_MIN_F32, + VOP3AOp.V_MAX_F32: _VOP3AOp_V_MAX_F32, + VOP3AOp.V_MIN_I32: _VOP3AOp_V_MIN_I32, + VOP3AOp.V_MAX_I32: _VOP3AOp_V_MAX_I32, + VOP3AOp.V_MIN_U32: _VOP3AOp_V_MIN_U32, + VOP3AOp.V_MAX_U32: _VOP3AOp_V_MAX_U32, + VOP3AOp.V_LSHRREV_B32: _VOP3AOp_V_LSHRREV_B32, + VOP3AOp.V_ASHRREV_I32: _VOP3AOp_V_ASHRREV_I32, + VOP3AOp.V_LSHLREV_B32: _VOP3AOp_V_LSHLREV_B32, + VOP3AOp.V_AND_B32: _VOP3AOp_V_AND_B32, + VOP3AOp.V_OR_B32: _VOP3AOp_V_OR_B32, + VOP3AOp.V_XOR_B32: _VOP3AOp_V_XOR_B32, + VOP3AOp.V_ADD_F16: _VOP3AOp_V_ADD_F16, + VOP3AOp.V_SUB_F16: _VOP3AOp_V_SUB_F16, + VOP3AOp.V_SUBREV_F16: _VOP3AOp_V_SUBREV_F16, + VOP3AOp.V_MUL_F16: _VOP3AOp_V_MUL_F16, + VOP3AOp.V_MAC_F16: _VOP3AOp_V_MAC_F16, + VOP3AOp.V_ADD_U16: _VOP3AOp_V_ADD_U16, + VOP3AOp.V_SUB_U16: _VOP3AOp_V_SUB_U16, + VOP3AOp.V_SUBREV_U16: _VOP3AOp_V_SUBREV_U16, + VOP3AOp.V_MUL_LO_U16: _VOP3AOp_V_MUL_LO_U16, + VOP3AOp.V_LSHLREV_B16: _VOP3AOp_V_LSHLREV_B16, + VOP3AOp.V_LSHRREV_B16: _VOP3AOp_V_LSHRREV_B16, + VOP3AOp.V_ASHRREV_I16: _VOP3AOp_V_ASHRREV_I16, + VOP3AOp.V_MAX_F16: _VOP3AOp_V_MAX_F16, + VOP3AOp.V_MIN_F16: _VOP3AOp_V_MIN_F16, + VOP3AOp.V_MAX_U16: _VOP3AOp_V_MAX_U16, + VOP3AOp.V_MAX_I16: _VOP3AOp_V_MAX_I16, + VOP3AOp.V_MIN_U16: _VOP3AOp_V_MIN_U16, + VOP3AOp.V_MIN_I16: _VOP3AOp_V_MIN_I16, + VOP3AOp.V_LDEXP_F16: _VOP3AOp_V_LDEXP_F16, + VOP3AOp.V_ADD_U32: _VOP3AOp_V_ADD_U32, + VOP3AOp.V_SUB_U32: _VOP3AOp_V_SUB_U32, + VOP3AOp.V_SUBREV_U32: _VOP3AOp_V_SUBREV_U32, + VOP3AOp.V_DOT2C_F32_F16: _VOP3AOp_V_DOT2C_F32_F16, + VOP3AOp.V_DOT2C_I32_I16: _VOP3AOp_V_DOT2C_I32_I16, + VOP3AOp.V_DOT4C_I32_I8: _VOP3AOp_V_DOT4C_I32_I8, + VOP3AOp.V_DOT8C_I32_I4: _VOP3AOp_V_DOT8C_I32_I4, + VOP3AOp.V_FMAC_F32: _VOP3AOp_V_FMAC_F32, + VOP3AOp.V_PK_FMAC_F16: _VOP3AOp_V_PK_FMAC_F16, + VOP3AOp.V_XNOR_B32: _VOP3AOp_V_XNOR_B32, + VOP3AOp.V_MAD_I32_I24: _VOP3AOp_V_MAD_I32_I24, + VOP3AOp.V_MAD_U32_U24: _VOP3AOp_V_MAD_U32_U24, + VOP3AOp.V_CUBEID_F32: _VOP3AOp_V_CUBEID_F32, + VOP3AOp.V_CUBESC_F32: _VOP3AOp_V_CUBESC_F32, + VOP3AOp.V_CUBETC_F32: _VOP3AOp_V_CUBETC_F32, + VOP3AOp.V_CUBEMA_F32: _VOP3AOp_V_CUBEMA_F32, + VOP3AOp.V_BFE_U32: _VOP3AOp_V_BFE_U32, + VOP3AOp.V_BFE_I32: _VOP3AOp_V_BFE_I32, + VOP3AOp.V_BFI_B32: _VOP3AOp_V_BFI_B32, + VOP3AOp.V_FMA_F32: _VOP3AOp_V_FMA_F32, + VOP3AOp.V_FMA_F64: _VOP3AOp_V_FMA_F64, + VOP3AOp.V_LERP_U8: _VOP3AOp_V_LERP_U8, + VOP3AOp.V_ALIGNBIT_B32: _VOP3AOp_V_ALIGNBIT_B32, + VOP3AOp.V_ALIGNBYTE_B32: _VOP3AOp_V_ALIGNBYTE_B32, + VOP3AOp.V_MIN3_F32: _VOP3AOp_V_MIN3_F32, + VOP3AOp.V_MIN3_I32: _VOP3AOp_V_MIN3_I32, + VOP3AOp.V_MIN3_U32: _VOP3AOp_V_MIN3_U32, + VOP3AOp.V_MAX3_F32: _VOP3AOp_V_MAX3_F32, + VOP3AOp.V_MAX3_I32: _VOP3AOp_V_MAX3_I32, + VOP3AOp.V_MAX3_U32: _VOP3AOp_V_MAX3_U32, + VOP3AOp.V_MED3_F32: _VOP3AOp_V_MED3_F32, + VOP3AOp.V_MED3_I32: _VOP3AOp_V_MED3_I32, + VOP3AOp.V_MED3_U32: _VOP3AOp_V_MED3_U32, + VOP3AOp.V_SAD_U8: _VOP3AOp_V_SAD_U8, + VOP3AOp.V_SAD_U16: _VOP3AOp_V_SAD_U16, + VOP3AOp.V_SAD_U32: _VOP3AOp_V_SAD_U32, + VOP3AOp.V_CVT_PK_U8_F32: _VOP3AOp_V_CVT_PK_U8_F32, + VOP3AOp.V_DIV_FIXUP_F32: _VOP3AOp_V_DIV_FIXUP_F32, + VOP3AOp.V_DIV_FIXUP_F64: _VOP3AOp_V_DIV_FIXUP_F64, + VOP3AOp.V_DIV_FMAS_F32: _VOP3AOp_V_DIV_FMAS_F32, + VOP3AOp.V_DIV_FMAS_F64: _VOP3AOp_V_DIV_FMAS_F64, + VOP3AOp.V_MSAD_U8: _VOP3AOp_V_MSAD_U8, + VOP3AOp.V_MAD_LEGACY_F16: _VOP3AOp_V_MAD_LEGACY_F16, + VOP3AOp.V_MAD_LEGACY_U16: _VOP3AOp_V_MAD_LEGACY_U16, + VOP3AOp.V_MAD_LEGACY_I16: _VOP3AOp_V_MAD_LEGACY_I16, + VOP3AOp.V_FMA_LEGACY_F16: _VOP3AOp_V_FMA_LEGACY_F16, + VOP3AOp.V_DIV_FIXUP_LEGACY_F16: _VOP3AOp_V_DIV_FIXUP_LEGACY_F16, + VOP3AOp.V_CVT_PKACCUM_U8_F32: _VOP3AOp_V_CVT_PKACCUM_U8_F32, + VOP3AOp.V_MAD_U32_U16: _VOP3AOp_V_MAD_U32_U16, + VOP3AOp.V_MAD_I32_I16: _VOP3AOp_V_MAD_I32_I16, + VOP3AOp.V_XAD_U32: _VOP3AOp_V_XAD_U32, + VOP3AOp.V_MIN3_F16: _VOP3AOp_V_MIN3_F16, + VOP3AOp.V_MIN3_I16: _VOP3AOp_V_MIN3_I16, + VOP3AOp.V_MIN3_U16: _VOP3AOp_V_MIN3_U16, + VOP3AOp.V_MAX3_F16: _VOP3AOp_V_MAX3_F16, + VOP3AOp.V_MAX3_I16: _VOP3AOp_V_MAX3_I16, + VOP3AOp.V_MAX3_U16: _VOP3AOp_V_MAX3_U16, + VOP3AOp.V_MED3_F16: _VOP3AOp_V_MED3_F16, + VOP3AOp.V_MED3_I16: _VOP3AOp_V_MED3_I16, + VOP3AOp.V_MED3_U16: _VOP3AOp_V_MED3_U16, + VOP3AOp.V_LSHL_ADD_U32: _VOP3AOp_V_LSHL_ADD_U32, + VOP3AOp.V_ADD_LSHL_U32: _VOP3AOp_V_ADD_LSHL_U32, + VOP3AOp.V_ADD3_U32: _VOP3AOp_V_ADD3_U32, + VOP3AOp.V_LSHL_OR_B32: _VOP3AOp_V_LSHL_OR_B32, + VOP3AOp.V_AND_OR_B32: _VOP3AOp_V_AND_OR_B32, + VOP3AOp.V_OR3_B32: _VOP3AOp_V_OR3_B32, + VOP3AOp.V_MAD_F16: _VOP3AOp_V_MAD_F16, + VOP3AOp.V_MAD_U16: _VOP3AOp_V_MAD_U16, + VOP3AOp.V_MAD_I16: _VOP3AOp_V_MAD_I16, + VOP3AOp.V_FMA_F16: _VOP3AOp_V_FMA_F16, + VOP3AOp.V_DIV_FIXUP_F16: _VOP3AOp_V_DIV_FIXUP_F16, + VOP3AOp.V_LSHL_ADD_U64: _VOP3AOp_V_LSHL_ADD_U64, + VOP3AOp.V_BITOP3_B16: _VOP3AOp_V_BITOP3_B16, + VOP3AOp.V_BITOP3_B32: _VOP3AOp_V_BITOP3_B32, + VOP3AOp.V_CVT_SCALEF32_PK_FP8_F32: _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32, + VOP3AOp.V_CVT_SCALEF32_PK_BF8_F32: _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32, + VOP3AOp.V_CVT_SCALEF32_SR_FP8_F32: _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32, + VOP3AOp.V_CVT_SCALEF32_SR_BF8_F32: _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F32, + VOP3AOp.V_CVT_SCALEF32_PK_F32_FP8: _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8, + VOP3AOp.V_CVT_SCALEF32_PK_F32_BF8: _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8, + VOP3AOp.V_CVT_SCALEF32_F32_FP8: _VOP3AOp_V_CVT_SCALEF32_F32_FP8, + VOP3AOp.V_CVT_SCALEF32_F32_BF8: _VOP3AOp_V_CVT_SCALEF32_F32_BF8, + VOP3AOp.V_CVT_SCALEF32_PK_FP4_F32: _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F32, + VOP3AOp.V_CVT_SCALEF32_SR_PK_FP4_F32: _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32, + VOP3AOp.V_CVT_SCALEF32_PK_F32_FP4: _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4, + VOP3AOp.V_CVT_SCALEF32_PK_FP8_F16: _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F16, + VOP3AOp.V_CVT_SCALEF32_PK_BF8_F16: _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F16, + VOP3AOp.V_CVT_SCALEF32_SR_FP8_F16: _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F16, + VOP3AOp.V_CVT_SCALEF32_SR_BF8_F16: _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F16, + VOP3AOp.V_CVT_SCALEF32_PK_F16_FP8: _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8, + VOP3AOp.V_CVT_SCALEF32_PK_F16_BF8: _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8, + VOP3AOp.V_CVT_SCALEF32_F16_FP8: _VOP3AOp_V_CVT_SCALEF32_F16_FP8, + VOP3AOp.V_CVT_SCALEF32_F16_BF8: _VOP3AOp_V_CVT_SCALEF32_F16_BF8, + VOP3AOp.V_CVT_SCALEF32_PK_FP4_F16: _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F16, + VOP3AOp.V_CVT_SCALEF32_SR_PK_FP4_F16: _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16, + VOP3AOp.V_CVT_SCALEF32_PK_F16_FP4: _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4, + VOP3AOp.V_CVT_SCALEF32_2XPK16_FP6_F32: _VOP3AOp_V_CVT_SCALEF32_2XPK16_FP6_F32, + VOP3AOp.V_CVT_SCALEF32_2XPK16_BF6_F32: _VOP3AOp_V_CVT_SCALEF32_2XPK16_BF6_F32, + VOP3AOp.V_CVT_SCALEF32_SR_PK32_FP6_F32: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F32, + VOP3AOp.V_CVT_SCALEF32_SR_PK32_BF6_F32: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F32, + VOP3AOp.V_CVT_SCALEF32_PK32_F32_FP6: _VOP3AOp_V_CVT_SCALEF32_PK32_F32_FP6, + VOP3AOp.V_CVT_SCALEF32_PK32_F32_BF6: _VOP3AOp_V_CVT_SCALEF32_PK32_F32_BF6, + VOP3AOp.V_CVT_SCALEF32_PK32_BF6_F16: _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_F16, + VOP3AOp.V_CVT_SCALEF32_SR_PK32_FP6_F16: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F16, + VOP3AOp.V_CVT_SCALEF32_SR_PK32_BF6_F16: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F16, + VOP3AOp.V_CVT_SCALEF32_PK32_F16_FP6: _VOP3AOp_V_CVT_SCALEF32_PK32_F16_FP6, + VOP3AOp.V_CVT_SCALEF32_PK32_F16_BF6: _VOP3AOp_V_CVT_SCALEF32_PK32_F16_BF6, + VOP3AOp.V_ASHR_PK_I8_I32: _VOP3AOp_V_ASHR_PK_I8_I32, + VOP3AOp.V_ASHR_PK_U8_I32: _VOP3AOp_V_ASHR_PK_U8_I32, + VOP3AOp.V_CVT_PK_F16_F32: _VOP3AOp_V_CVT_PK_F16_F32, + VOP3AOp.V_ADD_F64: _VOP3AOp_V_ADD_F64, + VOP3AOp.V_MUL_F64: _VOP3AOp_V_MUL_F64, + VOP3AOp.V_MIN_F64: _VOP3AOp_V_MIN_F64, + VOP3AOp.V_MAX_F64: _VOP3AOp_V_MAX_F64, + VOP3AOp.V_LDEXP_F64: _VOP3AOp_V_LDEXP_F64, + VOP3AOp.V_MUL_LO_U32: _VOP3AOp_V_MUL_LO_U32, + VOP3AOp.V_MUL_HI_U32: _VOP3AOp_V_MUL_HI_U32, + VOP3AOp.V_MUL_HI_I32: _VOP3AOp_V_MUL_HI_I32, + VOP3AOp.V_LDEXP_F32: _VOP3AOp_V_LDEXP_F32, + VOP3AOp.V_READLANE_B32: _VOP3AOp_V_READLANE_B32, + VOP3AOp.V_BCNT_U32_B32: _VOP3AOp_V_BCNT_U32_B32, + VOP3AOp.V_LSHLREV_B64: _VOP3AOp_V_LSHLREV_B64, + VOP3AOp.V_LSHRREV_B64: _VOP3AOp_V_LSHRREV_B64, + VOP3AOp.V_ASHRREV_I64: _VOP3AOp_V_ASHRREV_I64, + VOP3AOp.V_BFM_B32: _VOP3AOp_V_BFM_B32, + VOP3AOp.V_CVT_PKNORM_I16_F32: _VOP3AOp_V_CVT_PKNORM_I16_F32, + VOP3AOp.V_CVT_PKNORM_U16_F32: _VOP3AOp_V_CVT_PKNORM_U16_F32, + VOP3AOp.V_CVT_PKRTZ_F16_F32: _VOP3AOp_V_CVT_PKRTZ_F16_F32, + VOP3AOp.V_CVT_PK_U16_U32: _VOP3AOp_V_CVT_PK_U16_U32, + VOP3AOp.V_CVT_PK_I16_I32: _VOP3AOp_V_CVT_PK_I16_I32, + VOP3AOp.V_CVT_PKNORM_I16_F16: _VOP3AOp_V_CVT_PKNORM_I16_F16, + VOP3AOp.V_CVT_PKNORM_U16_F16: _VOP3AOp_V_CVT_PKNORM_U16_F16, + VOP3AOp.V_ADD_I32: _VOP3AOp_V_ADD_I32, + VOP3AOp.V_SUB_I32: _VOP3AOp_V_SUB_I32, + VOP3AOp.V_ADD_I16: _VOP3AOp_V_ADD_I16, + VOP3AOp.V_SUB_I16: _VOP3AOp_V_SUB_I16, + VOP3AOp.V_PACK_B32_F16: _VOP3AOp_V_PACK_B32_F16, + VOP3AOp.V_MUL_LEGACY_F32: _VOP3AOp_V_MUL_LEGACY_F32, + VOP3AOp.V_MINIMUM3_F32: _VOP3AOp_V_MINIMUM3_F32, + VOP3AOp.V_MAXIMUM3_F32: _VOP3AOp_V_MAXIMUM3_F32, +} + +def _VOP3BOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 64'U(S0.u32) + 64'U(S1.u32); + # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg((S0.u32) + (S1.u32)) + VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3BOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32 - S1.u32; + # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S0.u32 - S1.u32) + VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3BOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S1.u32 - S0.u32; + # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S1.u32 - S0.u32) + VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3BOp_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; + # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) + VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3BOp_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; + # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) + VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3BOp_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; + # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) + VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3BOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC = 0x0LL; + # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then + # D0.f32 = NAN.f32 + # elsif exponent(S2.f32) - exponent(S1.f32) >= 96 then + # // N/D near MAX_FLOAT_F32 + # VCC = 0x1LL; + # if S0.f32 == S1.f32 then + # // Only scale the denominator + # D0.f32 = ldexp(S0.f32, 64) + # endif + # elsif S1.f32 == DENORM.f32 then + # D0.f32 = ldexp(S0.f32, 64) + # elsif ((1.0 / 64'F(S1.f32) == DENORM.f64) && (S2.f32 / S1.f32 == DENORM.f32)) then + # VCC = 0x1LL; + # if S0.f32 == S1.f32 then + # // Only scale the denominator + # D0.f32 = ldexp(S0.f32, 64) + # endif + # elsif 1.0 / 64'F(S1.f32) == DENORM.f64 then + # D0.f32 = ldexp(S0.f32, -64) + # elsif S2.f32 / S1.f32 == DENORM.f32 then + # VCC = 0x1LL; + # if S0.f32 == S2.f32 then + # // Only scale the numerator + # D0.f32 = ldexp(S0.f32, 64) + # endif + # elsif exponent(S2.f32) <= 23 then + # // Numerator is tiny + # D0.f32 = ldexp(S0.f32, 64) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(s0) + VCC = Reg(vcc) + # --- compiled pseudocode --- + VCC = Reg(0x0) + if ((F(S2.f32) == 0.0) or (F(S1.f32) == 0.0)): + VCC = Reg(0x1); D0.f32 = float("nan") + elif exponent(S2.f32) - exponent(S1.f32) >= 96: + VCC = Reg(0x1) + if S0.f32 == S1.f32: + D0.f32 = ldexp(S0.f32, 64) + elif False: + pass # denorm check moved to end + elif ((1.0 / F(S1.f32) == DENORM.f64) and (S2.f32 / S1.f32 == DENORM.f32)): + VCC = Reg(0x1) + if S0.f32 == S1.f32: + D0.f32 = ldexp(S0.f32, 64) + elif 1.0 / F(S1.f32) == DENORM.f64: + D0.f32 = ldexp(S0.f32, -64) + elif S2.f32 / S1.f32 == DENORM.f32: + VCC = Reg(0x1) + elif exponent(S2.f32) <= 23: + VCC = Reg(0x1); D0.f32 = ldexp(S0.f32, 64) + if S1.f32 == DENORM.f32: + D0.f32 = float("nan") + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3BOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC = 0x0LL; + # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then + # D0.f64 = NAN.f64 + # elsif exponent(S2.f64) - exponent(S1.f64) >= 768 then + # // N/D near MAX_FLOAT_F64 + # VCC = 0x1LL; + # if S0.f64 == S1.f64 then + # // Only scale the denominator + # D0.f64 = ldexp(S0.f64, 128) + # endif + # elsif S1.f64 == DENORM.f64 then + # D0.f64 = ldexp(S0.f64, 128) + # elsif ((1.0 / S1.f64 == DENORM.f64) && (S2.f64 / S1.f64 == DENORM.f64)) then + # VCC = 0x1LL; + # if S0.f64 == S1.f64 then + # // Only scale the denominator + # D0.f64 = ldexp(S0.f64, 128) + # endif + # elsif 1.0 / S1.f64 == DENORM.f64 then + # D0.f64 = ldexp(S0.f64, -128) + # elsif S2.f64 / S1.f64 == DENORM.f64 then + # VCC = 0x1LL; + # if S0.f64 == S2.f64 then + # // Only scale the numerator + # D0.f64 = ldexp(S0.f64, 128) + # endif + # elsif exponent(S2.f64) <= 53 then + # // Numerator is tiny + # D0.f64 = ldexp(S0.f64, 128) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(s0) + VCC = Reg(vcc) + # --- compiled pseudocode --- + VCC = Reg(0x0) + if ((S2.f64 == 0.0) or (S1.f64 == 0.0)): + VCC = Reg(0x1); D0.f64 = float("nan") + elif exponent(S2.f64) - exponent(S1.f64) >= 768: + VCC = Reg(0x1) + if S0.f64 == S1.f64: + D0.f64 = ldexp(S0.f64, 128) + elif False: + pass # denorm check moved to end + elif ((1.0 / S1.f64 == DENORM.f64) and (S2.f64 / S1.f64 == DENORM.f64)): + VCC = Reg(0x1) + if S0.f64 == S1.f64: + D0.f64 = ldexp(S0.f64, 128) + elif 1.0 / S1.f64 == DENORM.f64: + D0.f64 = ldexp(S0.f64, -128) + elif S2.f64 / S1.f64 == DENORM.f64: + VCC = Reg(0x1) + elif exponent(S2.f64) <= 53: + D0.f64 = ldexp(S0.f64, 128) + if S1.f64 == DENORM.f64: + D0.f64 = float("nan") + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3BOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + D1 = Reg(0) + # --- compiled pseudocode --- + _full = ((S0.u32) * (S1.u32) + (S2.u64)) + D0.u64 = int(_full) & 0xffffffffffffffff + D1 = Reg((int(_full) >> 64) & 1) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + result['d1'] = D1._val & 1 + return result + +def _VOP3BOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + D1 = Reg(0) + # --- compiled pseudocode --- + _full = ((S0.i32) * (S1.i32) + (S2.i64)) + D0.u64 = int(_full) & 0xffffffffffffffff + D1 = Reg((int(_full) >> 64) & 1) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + result['d1'] = D1._val & 1 + return result + +VOP3BOp_FUNCTIONS = { + VOP3BOp.V_ADD_CO_U32: _VOP3BOp_V_ADD_CO_U32, + VOP3BOp.V_SUB_CO_U32: _VOP3BOp_V_SUB_CO_U32, + VOP3BOp.V_SUBREV_CO_U32: _VOP3BOp_V_SUBREV_CO_U32, + VOP3BOp.V_ADDC_CO_U32: _VOP3BOp_V_ADDC_CO_U32, + VOP3BOp.V_SUBB_CO_U32: _VOP3BOp_V_SUBB_CO_U32, + VOP3BOp.V_SUBBREV_CO_U32: _VOP3BOp_V_SUBBREV_CO_U32, + VOP3BOp.V_DIV_SCALE_F32: _VOP3BOp_V_DIV_SCALE_F32, + VOP3BOp.V_DIV_SCALE_F64: _VOP3BOp_V_DIV_SCALE_F64, + VOP3BOp.V_MAD_U64_U32: _VOP3BOp_V_MAD_U64_U32, + VOP3BOp.V_MAD_I64_I32: _VOP3BOp_V_MAD_I64_I32, +} + +COMPILED_FUNCTIONS = { + SOP1Op: SOP1Op_FUNCTIONS, + SOP2Op: SOP2Op_FUNCTIONS, + SOPCOp: SOPCOp_FUNCTIONS, + SOPKOp: SOPKOp_FUNCTIONS, + SOPPOp: SOPPOp_FUNCTIONS, + VOP1Op: VOP1Op_FUNCTIONS, + VOP2Op: VOP2Op_FUNCTIONS, + VOP3POp: VOP3POp_FUNCTIONS, + VOP3AOp: VOP3AOp_FUNCTIONS, + VOP3BOp: VOP3BOp_FUNCTIONS, +} + +def get_compiled_functions(): return COMPILED_FUNCTIONS \ No newline at end of file diff --git a/extra/assembly/amd/autogen/cdna4/gen_pcode.py b/extra/assembly/amd/autogen/cdna4/gen_pcode.py deleted file mode 100644 index a039acb815..0000000000 --- a/extra/assembly/amd/autogen/cdna4/gen_pcode.py +++ /dev/null @@ -1,1630 +0,0 @@ -# autogenerated by pcode.py - do not edit -# to regenerate: python -m extra.assembly.amd.pcode --arch cdna4 -# ruff: noqa: E501,F405,F403 -# mypy: ignore-errors -from extra.assembly.amd.autogen.cdna4 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp -from extra.assembly.amd.pcode import * - -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.b64 = S0.b64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # if SCC then - # D0.b32 = S0.b32 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- - if SCC: - D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result - -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # if SCC then - # D0.b64 = S0.b64 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- - if SCC: - D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result - -SOP1Op_FUNCTIONS = { - SOP1Op.S_MOV_B32: _SOP1Op_S_MOV_B32, - SOP1Op.S_MOV_B64: _SOP1Op_S_MOV_B64, - SOP1Op.S_CMOV_B32: _SOP1Op_S_CMOV_B32, - SOP1Op.S_CMOV_B64: _SOP1Op_S_CMOV_B64, -} - -def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp = Reg((S0.u32) + (S1.u32)) - SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) - D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result - -def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # tmp = S0.u32 - S1.u32; - # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp = Reg(S0.u32 - S1.u32) - SCC = Reg(((1) if (S1.u32 > S0.u32) else (0))) - D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result - -def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # tmp = S0.i32 + S1.i32; - # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp = Reg(S0.i32 + S1.i32) - SCC = Reg(((S0.u32[31] == S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) - D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result - -def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # tmp = S0.i32 - S1.i32; - # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp = Reg(S0.i32 - S1.i32) - SCC = Reg(((S0.u32[31] != S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) - D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result - -def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp = Reg((S0.u32) + (S1.u32) + SCC.u64) - SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) - D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result - -def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # tmp = S0.u32 - S1.u32 - SCC.u32; - # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp = Reg(S0.u32 - S1.u32 - SCC.u32) - SCC = Reg(((1) if ((S1.u32) + SCC.u64 > (S0.u32)) else (0))) - D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result - -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0 = Reg(_pack(S1[15 : 0].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0 = Reg(_pack(S1[31 : 16].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0 = Reg(_pack(S1[31 : 16].u16, S0[31 : 16].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -SOP2Op_FUNCTIONS = { - SOP2Op.S_ADD_U32: _SOP2Op_S_ADD_U32, - SOP2Op.S_SUB_U32: _SOP2Op_S_SUB_U32, - SOP2Op.S_ADD_I32: _SOP2Op_S_ADD_I32, - SOP2Op.S_SUB_I32: _SOP2Op_S_SUB_I32, - SOP2Op.S_ADDC_U32: _SOP2Op_S_ADDC_U32, - SOP2Op.S_SUBB_U32: _SOP2Op_S_SUBB_U32, - SOP2Op.S_PACK_LL_B32_B16: _SOP2Op_S_PACK_LL_B32_B16, - SOP2Op.S_PACK_LH_B32_B16: _SOP2Op_S_PACK_LH_B32_B16, - SOP2Op.S_PACK_HH_B32_B16: _SOP2Op_S_PACK_HH_B32_B16, -} - -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.i32 == S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.i32 != S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.i32 > S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.i32 >= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.i32 < S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.i32 <= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.u32 == S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.u32 != S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.u32 > S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.u32 >= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.u32 < S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.u32 <= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.u32[S1.u32[4 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.u32[S1.u32[4 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.u64[S1.u32[5 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- - SCC = Reg(S0.u64[S1.u32[5 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -SOPCOp_FUNCTIONS = { - SOPCOp.S_CMP_EQ_I32: _SOPCOp_S_CMP_EQ_I32, - SOPCOp.S_CMP_LG_I32: _SOPCOp_S_CMP_LG_I32, - SOPCOp.S_CMP_GT_I32: _SOPCOp_S_CMP_GT_I32, - SOPCOp.S_CMP_GE_I32: _SOPCOp_S_CMP_GE_I32, - SOPCOp.S_CMP_LT_I32: _SOPCOp_S_CMP_LT_I32, - SOPCOp.S_CMP_LE_I32: _SOPCOp_S_CMP_LE_I32, - SOPCOp.S_CMP_EQ_U32: _SOPCOp_S_CMP_EQ_U32, - SOPCOp.S_CMP_LG_U32: _SOPCOp_S_CMP_LG_U32, - SOPCOp.S_CMP_GT_U32: _SOPCOp_S_CMP_GT_U32, - SOPCOp.S_CMP_GE_U32: _SOPCOp_S_CMP_GE_U32, - SOPCOp.S_CMP_LT_U32: _SOPCOp_S_CMP_LT_U32, - SOPCOp.S_CMP_LE_U32: _SOPCOp_S_CMP_LE_U32, - SOPCOp.S_BITCMP0_B32: _SOPCOp_S_BITCMP0_B32, - SOPCOp.S_BITCMP1_B32: _SOPCOp_S_BITCMP1_B32, - SOPCOp.S_BITCMP0_B64: _SOPCOp_S_BITCMP0_B64, - SOPCOp.S_BITCMP1_B64: _SOPCOp_S_BITCMP1_B64, -} - -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -SOPKOp_FUNCTIONS = { - SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, -} - -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # for i in 0U : SIMM16.u16[3 : 0].u32 do - # endfor - SIMM16 = Reg(literal) - # --- compiled pseudocode --- - for i in range(0, int(SIMM16.u16[3 : 0].u32)+1): - pass - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result - -SOPPOp_FUNCTIONS = { - SOPPOp.S_NOP: _SOPPOp_S_NOP, -} - -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare lane : 32'I; - # if EXEC == 0x0LL then - # lane = 0; - # // Force lane 0 if all lanes are disabled - # else - # lane = s_ff1_i32_b64(EXEC); - # // Lowest active lane - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) - SRC0 = Reg(src0_idx) - # --- compiled pseudocode --- - if EXEC == 0x0: - lane = 0 - else: - lane = s_ff1_i32_b64(EXEC) - D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result - -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = trunc(S0.f64) - if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): - D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = floor(S0.f64 + 0.5) - if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): - D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = trunc(S0.f64) - if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): - D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -VOP1Op_FUNCTIONS = { - VOP1Op.V_MOV_B32: _VOP1Op_V_MOV_B32, - VOP1Op.V_READFIRSTLANE_B32: _VOP1Op_V_READFIRSTLANE_B32, - VOP1Op.V_CVT_I32_F64: _VOP1Op_V_CVT_I32_F64, - VOP1Op.V_CVT_F64_I32: _VOP1Op_V_CVT_F64_I32, - VOP1Op.V_CVT_F32_I32: _VOP1Op_V_CVT_F32_I32, - VOP1Op.V_CVT_F32_U32: _VOP1Op_V_CVT_F32_U32, - VOP1Op.V_CVT_U32_F32: _VOP1Op_V_CVT_U32_F32, - VOP1Op.V_CVT_I32_F32: _VOP1Op_V_CVT_I32_F32, - VOP1Op.V_CVT_F16_F32: _VOP1Op_V_CVT_F16_F32, - VOP1Op.V_CVT_F32_F16: _VOP1Op_V_CVT_F32_F16, - VOP1Op.V_CVT_F32_F64: _VOP1Op_V_CVT_F32_F64, - VOP1Op.V_CVT_F64_F32: _VOP1Op_V_CVT_F64_F32, - VOP1Op.V_CVT_F32_UBYTE0: _VOP1Op_V_CVT_F32_UBYTE0, - VOP1Op.V_CVT_F32_UBYTE1: _VOP1Op_V_CVT_F32_UBYTE1, - VOP1Op.V_CVT_F32_UBYTE2: _VOP1Op_V_CVT_F32_UBYTE2, - VOP1Op.V_CVT_F32_UBYTE3: _VOP1Op_V_CVT_F32_UBYTE3, - VOP1Op.V_CVT_U32_F64: _VOP1Op_V_CVT_U32_F64, - VOP1Op.V_CVT_F64_U32: _VOP1Op_V_CVT_F64_U32, - VOP1Op.V_TRUNC_F64: _VOP1Op_V_TRUNC_F64, - VOP1Op.V_CEIL_F64: _VOP1Op_V_CEIL_F64, - VOP1Op.V_RNDNE_F64: _VOP1Op_V_RNDNE_F64, - VOP1Op.V_FLOOR_F64: _VOP1Op_V_FLOOR_F64, -} - -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16); - # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) - D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -VOP2Op_FUNCTIONS = { - VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16, -} - -def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare lane : 32'I; - # if EXEC == 0x0LL then - # lane = 0; - # // Force lane 0 if all lanes are disabled - # else - # lane = s_ff1_i32_b64(EXEC); - # // Lowest active lane - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) - SRC0 = Reg(src0_idx) - # --- compiled pseudocode --- - if EXEC == 0x0: - lane = 0 - else: - lane = s_ff1_i32_b64(EXEC) - D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result - -def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = trunc(S0.f64) - if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): - D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = floor(S0.f64 + 0.5) - if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): - D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.f64 = trunc(S0.f64) - if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): - D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result - -VOP3Op_FUNCTIONS = { - VOP3Op.V_MOV_B32: _VOP3Op_V_MOV_B32, - VOP3Op.V_READFIRSTLANE_B32: _VOP3Op_V_READFIRSTLANE_B32, - VOP3Op.V_CVT_I32_F64: _VOP3Op_V_CVT_I32_F64, - VOP3Op.V_CVT_F64_I32: _VOP3Op_V_CVT_F64_I32, - VOP3Op.V_CVT_F32_I32: _VOP3Op_V_CVT_F32_I32, - VOP3Op.V_CVT_F32_U32: _VOP3Op_V_CVT_F32_U32, - VOP3Op.V_CVT_U32_F32: _VOP3Op_V_CVT_U32_F32, - VOP3Op.V_CVT_I32_F32: _VOP3Op_V_CVT_I32_F32, - VOP3Op.V_CVT_F16_F32: _VOP3Op_V_CVT_F16_F32, - VOP3Op.V_CVT_F32_F16: _VOP3Op_V_CVT_F32_F16, - VOP3Op.V_CVT_F32_F64: _VOP3Op_V_CVT_F32_F64, - VOP3Op.V_CVT_F64_F32: _VOP3Op_V_CVT_F64_F32, - VOP3Op.V_CVT_F32_UBYTE0: _VOP3Op_V_CVT_F32_UBYTE0, - VOP3Op.V_CVT_F32_UBYTE1: _VOP3Op_V_CVT_F32_UBYTE1, - VOP3Op.V_CVT_F32_UBYTE2: _VOP3Op_V_CVT_F32_UBYTE2, - VOP3Op.V_CVT_F32_UBYTE3: _VOP3Op_V_CVT_F32_UBYTE3, - VOP3Op.V_CVT_U32_F64: _VOP3Op_V_CVT_U32_F64, - VOP3Op.V_CVT_F64_U32: _VOP3Op_V_CVT_F64_U32, - VOP3Op.V_TRUNC_F64: _VOP3Op_V_TRUNC_F64, - VOP3Op.V_CEIL_F64: _VOP3Op_V_CEIL_F64, - VOP3Op.V_RNDNE_F64: _VOP3Op_V_RNDNE_F64, - VOP3Op.V_FLOOR_F64: _VOP3Op_V_FLOOR_F64, -} - -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16 - tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16 - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 - tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 - D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16 - tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16 - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16 - tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16 - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32) - tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32) - D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32) - tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32) - D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32) - tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32) - D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 >= S1[15 : 0].i16) else (S1[15 : 0].i16)) - tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 >= S1[31 : 16].i16) else (S1[31 : 16].i16)) - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 < S1[15 : 0].i16) else (S1[15 : 0].i16)) - tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 < S1[31 : 16].i16) else (S1[31 : 16].i16)) - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16 - tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16 - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16 - tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16 - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16 - tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16 - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 >= S1[15 : 0].u16) else (S1[15 : 0].u16)) - tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 >= S1[31 : 16].u16) else (S1[31 : 16].u16)) - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 < S1[15 : 0].u16) else (S1[15 : 0].u16)) - tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 < S1[31 : 16].u16) else (S1[31 : 16].u16)) - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); - # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16) - tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16) - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; - # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16 - tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16 - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; - # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16 - tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16 - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16) - tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16) - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16) - tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16) - D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -VOP3POp_FUNCTIONS = { - VOP3POp.V_PK_MAD_I16: _VOP3POp_V_PK_MAD_I16, - VOP3POp.V_PK_MUL_LO_U16: _VOP3POp_V_PK_MUL_LO_U16, - VOP3POp.V_PK_ADD_I16: _VOP3POp_V_PK_ADD_I16, - VOP3POp.V_PK_SUB_I16: _VOP3POp_V_PK_SUB_I16, - VOP3POp.V_PK_LSHLREV_B16: _VOP3POp_V_PK_LSHLREV_B16, - VOP3POp.V_PK_LSHRREV_B16: _VOP3POp_V_PK_LSHRREV_B16, - VOP3POp.V_PK_ASHRREV_I16: _VOP3POp_V_PK_ASHRREV_I16, - VOP3POp.V_PK_MAX_I16: _VOP3POp_V_PK_MAX_I16, - VOP3POp.V_PK_MIN_I16: _VOP3POp_V_PK_MIN_I16, - VOP3POp.V_PK_MAD_U16: _VOP3POp_V_PK_MAD_U16, - VOP3POp.V_PK_ADD_U16: _VOP3POp_V_PK_ADD_U16, - VOP3POp.V_PK_SUB_U16: _VOP3POp_V_PK_SUB_U16, - VOP3POp.V_PK_MAX_U16: _VOP3POp_V_PK_MAX_U16, - VOP3POp.V_PK_MIN_U16: _VOP3POp_V_PK_MIN_U16, - VOP3POp.V_PK_FMA_F16: _VOP3POp_V_PK_FMA_F16, - VOP3POp.V_PK_ADD_F16: _VOP3POp_V_PK_ADD_F16, - VOP3POp.V_PK_MUL_F16: _VOP3POp_V_PK_MUL_F16, - VOP3POp.V_PK_MIN_F16: _VOP3POp_V_PK_MIN_F16, - VOP3POp.V_PK_MAX_F16: _VOP3POp_V_PK_MAX_F16, -} - -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- - EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result - -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- - EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result - -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- - EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result - -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- - EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result - -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- - EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result - -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- - EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result - -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- - EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result - -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- - EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result - -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- - EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result - -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- - EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result - -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- - EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result - -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- - EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result - -VOPCOp_FUNCTIONS = { - VOPCOp.V_CMPX_LT_I16: _VOPCOp_V_CMPX_LT_I16, - VOPCOp.V_CMPX_EQ_I16: _VOPCOp_V_CMPX_EQ_I16, - VOPCOp.V_CMPX_LE_I16: _VOPCOp_V_CMPX_LE_I16, - VOPCOp.V_CMPX_GT_I16: _VOPCOp_V_CMPX_GT_I16, - VOPCOp.V_CMPX_NE_I16: _VOPCOp_V_CMPX_NE_I16, - VOPCOp.V_CMPX_GE_I16: _VOPCOp_V_CMPX_GE_I16, - VOPCOp.V_CMPX_LT_U16: _VOPCOp_V_CMPX_LT_U16, - VOPCOp.V_CMPX_EQ_U16: _VOPCOp_V_CMPX_EQ_U16, - VOPCOp.V_CMPX_LE_U16: _VOPCOp_V_CMPX_LE_U16, - VOPCOp.V_CMPX_GT_U16: _VOPCOp_V_CMPX_GT_U16, - VOPCOp.V_CMPX_NE_U16: _VOPCOp_V_CMPX_NE_U16, - VOPCOp.V_CMPX_GE_U16: _VOPCOp_V_CMPX_GE_U16, -} - - -# V_WRITELANE_B32: Write scalar to specific lane's VGPR (not in PDF pseudocode) -def _VOP3Op_V_WRITELANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - wr_lane = s1 & 0x1f # lane select (5 bits for wave32) - return {'d0': d0, 'scc': scc, 'vgpr_write': (wr_lane, vdst_idx, s0 & 0xffffffff)} -VOP3Op_FUNCTIONS[VOP3Op.V_WRITELANE_B32] = _VOP3Op_V_WRITELANE_B32 - -COMPILED_FUNCTIONS = { - SOP1Op: SOP1Op_FUNCTIONS, - SOP2Op: SOP2Op_FUNCTIONS, - SOPCOp: SOPCOp_FUNCTIONS, - SOPKOp: SOPKOp_FUNCTIONS, - SOPPOp: SOPPOp_FUNCTIONS, - VOP1Op: VOP1Op_FUNCTIONS, - VOP2Op: VOP2Op_FUNCTIONS, - VOP3Op: VOP3Op_FUNCTIONS, - VOP3POp: VOP3POp_FUNCTIONS, - VOPCOp: VOPCOp_FUNCTIONS, -} - -def get_compiled_functions(): return COMPILED_FUNCTIONS \ No newline at end of file diff --git a/extra/assembly/amd/autogen/rdna3/__init__.py b/extra/assembly/amd/autogen/rdna3/__init__.py index 3c9955a9a5..65c8c928cd 100644 --- a/extra/assembly/amd/autogen/rdna3/__init__.py +++ b/extra/assembly/amd/autogen/rdna3/__init__.py @@ -1,7 +1,7 @@ -# autogenerated from AMD RDNA3.5 ISA PDF by lib.py - do not edit +# autogenerated from AMD RDNA3.5 ISA PDF by dsl.py - do not edit from enum import IntEnum from typing import Annotated -from extra.assembly.amd.lib import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField +from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField import functools class SrcEnum(IntEnum): diff --git a/extra/assembly/amd/autogen/rdna4/__init__.py b/extra/assembly/amd/autogen/rdna4/__init__.py new file mode 100644 index 0000000000..898c789549 --- /dev/null +++ b/extra/assembly/amd/autogen/rdna4/__init__.py @@ -0,0 +1,3051 @@ +# autogenerated from AMD RDNA4 ISA PDF by dsl.py - do not edit +from enum import IntEnum +from typing import Annotated +from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField +import functools + +class SrcEnum(IntEnum): + VCC_LO = 106 + VCC_HI = 107 + NULL = 124 + M0 = 125 + EXEC_LO = 126 + EXEC_HI = 127 + ZERO = 128 + DPP8 = 233 + DPP8FI = 234 + SHARED_BASE = 235 + SHARED_LIMIT = 236 + PRIVATE_BASE = 237 + PRIVATE_LIMIT = 238 + POS_HALF = 240 + NEG_HALF = 241 + POS_ONE = 242 + NEG_ONE = 243 + POS_TWO = 244 + NEG_TWO = 245 + POS_FOUR = 246 + NEG_FOUR = 247 + INV_2PI = 248 + DPP16 = 250 + VCCZ = 251 + EXECZ = 252 + SCC = 253 + LDS_DIRECT = 254 + +class DSOp(IntEnum): + DS_ADD_U32 = 0 + DS_SUB_U32 = 1 + DS_RSUB_U32 = 2 + DS_INC_U32 = 3 + DS_DEC_U32 = 4 + DS_MIN_I32 = 5 + DS_MAX_I32 = 6 + DS_MIN_U32 = 7 + DS_MAX_U32 = 8 + DS_AND_B32 = 9 + DS_OR_B32 = 10 + DS_XOR_B32 = 11 + DS_MSKOR_B32 = 12 + DS_STORE_B32 = 13 + DS_STORE_2ADDR_B32 = 14 + DS_STORE_2ADDR_STRIDE64_B32 = 15 + DS_CMPSTORE_B32 = 16 + DS_MIN_NUM_F32 = 18 + DS_MAX_NUM_F32 = 19 + DS_NOP = 20 + DS_ADD_F32 = 21 + DS_STORE_B8 = 30 + DS_STORE_B16 = 31 + DS_ADD_RTN_U32 = 32 + DS_SUB_RTN_U32 = 33 + DS_RSUB_RTN_U32 = 34 + DS_INC_RTN_U32 = 35 + DS_DEC_RTN_U32 = 36 + DS_MIN_RTN_I32 = 37 + DS_MAX_RTN_I32 = 38 + DS_MIN_RTN_U32 = 39 + DS_MAX_RTN_U32 = 40 + DS_AND_RTN_B32 = 41 + DS_OR_RTN_B32 = 42 + DS_XOR_RTN_B32 = 43 + DS_MSKOR_RTN_B32 = 44 + DS_STOREXCHG_RTN_B32 = 45 + DS_STOREXCHG_2ADDR_RTN_B32 = 46 + DS_STOREXCHG_2ADDR_STRIDE64_RTN_B32 = 47 + DS_CMPSTORE_RTN_B32 = 48 + DS_MIN_NUM_RTN_F32 = 50 + DS_MAX_NUM_RTN_F32 = 51 + DS_SWIZZLE_B32 = 53 + DS_LOAD_B32 = 54 + DS_LOAD_2ADDR_B32 = 55 + DS_LOAD_2ADDR_STRIDE64_B32 = 56 + DS_LOAD_I8 = 57 + DS_LOAD_U8 = 58 + DS_LOAD_I16 = 59 + DS_LOAD_U16 = 60 + DS_CONSUME = 61 + DS_APPEND = 62 + DS_ADD_U64 = 64 + DS_SUB_U64 = 65 + DS_RSUB_U64 = 66 + DS_INC_U64 = 67 + DS_DEC_U64 = 68 + DS_MIN_I64 = 69 + DS_MAX_I64 = 70 + DS_MIN_U64 = 71 + DS_MAX_U64 = 72 + DS_AND_B64 = 73 + DS_OR_B64 = 74 + DS_XOR_B64 = 75 + DS_MSKOR_B64 = 76 + DS_STORE_B64 = 77 + DS_STORE_2ADDR_B64 = 78 + DS_STORE_2ADDR_STRIDE64_B64 = 79 + DS_CMPSTORE_B64 = 80 + DS_MIN_NUM_F64 = 82 + DS_MAX_NUM_F64 = 83 + DS_ADD_RTN_U64 = 96 + DS_SUB_RTN_U64 = 97 + DS_RSUB_RTN_U64 = 98 + DS_INC_RTN_U64 = 99 + DS_DEC_RTN_U64 = 100 + DS_MIN_RTN_I64 = 101 + DS_MAX_RTN_I64 = 102 + DS_MIN_RTN_U64 = 103 + DS_MAX_RTN_U64 = 104 + DS_AND_RTN_B64 = 105 + DS_OR_RTN_B64 = 106 + DS_XOR_RTN_B64 = 107 + DS_MSKOR_RTN_B64 = 108 + DS_STOREXCHG_RTN_B64 = 109 + DS_STOREXCHG_2ADDR_RTN_B64 = 110 + DS_STOREXCHG_2ADDR_STRIDE64_RTN_B64 = 111 + DS_CMPSTORE_RTN_B64 = 112 + DS_MIN_NUM_RTN_F64 = 114 + DS_MAX_NUM_RTN_F64 = 115 + DS_LOAD_B64 = 118 + DS_LOAD_2ADDR_B64 = 119 + DS_LOAD_2ADDR_STRIDE64_B64 = 120 + DS_ADD_RTN_F32 = 121 + DS_CONDXCHG32_RTN_B64 = 126 + DS_COND_SUB_U32 = 152 + DS_SUB_CLAMP_U32 = 153 + DS_PK_ADD_F16 = 154 + DS_PK_ADD_BF16 = 155 + DS_STORE_B8_D16_HI = 160 + DS_STORE_B16_D16_HI = 161 + DS_LOAD_U8_D16 = 162 + DS_LOAD_U8_D16_HI = 163 + DS_LOAD_I8_D16 = 164 + DS_LOAD_I8_D16_HI = 165 + DS_LOAD_U16_D16 = 166 + DS_LOAD_U16_D16_HI = 167 + DS_COND_SUB_RTN_U32 = 168 + DS_SUB_CLAMP_RTN_U32 = 169 + DS_PK_ADD_RTN_F16 = 170 + DS_PK_ADD_RTN_BF16 = 171 + DS_STORE_ADDTID_B32 = 176 + DS_LOAD_ADDTID_B32 = 177 + DS_PERMUTE_B32 = 178 + DS_BPERMUTE_B32 = 179 + DS_BPERMUTE_FI_B32 = 205 + DS_STORE_B96 = 222 + DS_STORE_B128 = 223 + DS_BVH_STACK_PUSH4_POP1_RTN_B32 = 224 + DS_BVH_STACK_PUSH8_POP1_RTN_B32 = 225 + DS_BVH_STACK_PUSH8_POP2_RTN_B64 = 226 + DS_LOAD_B96 = 254 + DS_LOAD_B128 = 255 + +class SMEMOp(IntEnum): + S_LOAD_B32 = 0 + S_LOAD_B64 = 1 + S_LOAD_B128 = 2 + S_LOAD_B256 = 3 + S_LOAD_B512 = 4 + S_LOAD_B96 = 5 + S_LOAD_I8 = 8 + S_LOAD_U8 = 9 + S_LOAD_I16 = 10 + S_LOAD_U16 = 11 + S_BUFFER_LOAD_B32 = 16 + S_BUFFER_LOAD_B64 = 17 + S_BUFFER_LOAD_B128 = 18 + S_BUFFER_LOAD_B256 = 19 + S_BUFFER_LOAD_B512 = 20 + S_BUFFER_LOAD_B96 = 21 + S_BUFFER_LOAD_I8 = 24 + S_BUFFER_LOAD_U8 = 25 + S_BUFFER_LOAD_I16 = 26 + S_BUFFER_LOAD_U16 = 27 + S_DCACHE_INV = 33 + S_PREFETCH_INST = 36 + S_PREFETCH_INST_PC_REL = 37 + S_PREFETCH_DATA = 38 + S_BUFFER_PREFETCH_DATA = 39 + S_PREFETCH_DATA_PC_REL = 40 + +class SOP1Op(IntEnum): + S_MOV_B32 = 0 + S_MOV_B64 = 1 + S_CMOV_B32 = 2 + S_CMOV_B64 = 3 + S_BREV_B32 = 4 + S_BREV_B64 = 5 + S_CTZ_I32_B32 = 8 + S_CTZ_I32_B64 = 9 + S_CLZ_I32_U32 = 10 + S_CLZ_I32_U64 = 11 + S_CLS_I32 = 12 + S_CLS_I32_I64 = 13 + S_SEXT_I32_I8 = 14 + S_SEXT_I32_I16 = 15 + S_BITSET0_B32 = 16 + S_BITSET0_B64 = 17 + S_BITSET1_B32 = 18 + S_BITSET1_B64 = 19 + S_BITREPLICATE_B64_B32 = 20 + S_ABS_I32 = 21 + S_BCNT0_I32_B32 = 22 + S_BCNT0_I32_B64 = 23 + S_BCNT1_I32_B32 = 24 + S_BCNT1_I32_B64 = 25 + S_QUADMASK_B32 = 26 + S_QUADMASK_B64 = 27 + S_WQM_B32 = 28 + S_WQM_B64 = 29 + S_NOT_B32 = 30 + S_NOT_B64 = 31 + S_AND_SAVEEXEC_B32 = 32 + S_AND_SAVEEXEC_B64 = 33 + S_OR_SAVEEXEC_B32 = 34 + S_OR_SAVEEXEC_B64 = 35 + S_XOR_SAVEEXEC_B32 = 36 + S_XOR_SAVEEXEC_B64 = 37 + S_NAND_SAVEEXEC_B32 = 38 + S_NAND_SAVEEXEC_B64 = 39 + S_NOR_SAVEEXEC_B32 = 40 + S_NOR_SAVEEXEC_B64 = 41 + S_XNOR_SAVEEXEC_B32 = 42 + S_XNOR_SAVEEXEC_B64 = 43 + S_AND_NOT0_SAVEEXEC_B32 = 44 + S_AND_NOT0_SAVEEXEC_B64 = 45 + S_OR_NOT0_SAVEEXEC_B32 = 46 + S_OR_NOT0_SAVEEXEC_B64 = 47 + S_AND_NOT1_SAVEEXEC_B32 = 48 + S_AND_NOT1_SAVEEXEC_B64 = 49 + S_OR_NOT1_SAVEEXEC_B32 = 50 + S_OR_NOT1_SAVEEXEC_B64 = 51 + S_AND_NOT0_WREXEC_B32 = 52 + S_AND_NOT0_WREXEC_B64 = 53 + S_AND_NOT1_WREXEC_B32 = 54 + S_AND_NOT1_WREXEC_B64 = 55 + S_MOVRELS_B32 = 64 + S_MOVRELS_B64 = 65 + S_MOVRELD_B32 = 66 + S_MOVRELD_B64 = 67 + S_MOVRELSD_2_B32 = 68 + S_GETPC_B64 = 71 + S_SETPC_B64 = 72 + S_SWAPPC_B64 = 73 + S_RFE_B64 = 74 + S_SENDMSG_RTN_B32 = 76 + S_SENDMSG_RTN_B64 = 77 + S_BARRIER_SIGNAL = 78 + S_BARRIER_SIGNAL_ISFIRST = 79 + S_GET_BARRIER_STATE = 80 + S_ALLOC_VGPR = 83 + S_SLEEP_VAR = 88 + S_CEIL_F32 = 96 + S_FLOOR_F32 = 97 + S_TRUNC_F32 = 98 + S_RNDNE_F32 = 99 + S_CVT_F32_I32 = 100 + S_CVT_F32_U32 = 101 + S_CVT_I32_F32 = 102 + S_CVT_U32_F32 = 103 + S_CVT_F16_F32 = 104 + S_CVT_F32_F16 = 105 + S_CVT_HI_F32_F16 = 106 + S_CEIL_F16 = 107 + S_FLOOR_F16 = 108 + S_TRUNC_F16 = 109 + S_RNDNE_F16 = 110 + +class SOP2Op(IntEnum): + S_ADD_CO_U32 = 0 + S_SUB_CO_U32 = 1 + S_ADD_CO_I32 = 2 + S_SUB_CO_I32 = 3 + S_ADD_CO_CI_U32 = 4 + S_SUB_CO_CI_U32 = 5 + S_ABSDIFF_I32 = 6 + S_LSHL_B32 = 8 + S_LSHL_B64 = 9 + S_LSHR_B32 = 10 + S_LSHR_B64 = 11 + S_ASHR_I32 = 12 + S_ASHR_I64 = 13 + S_LSHL1_ADD_U32 = 14 + S_LSHL2_ADD_U32 = 15 + S_LSHL3_ADD_U32 = 16 + S_LSHL4_ADD_U32 = 17 + S_MIN_I32 = 18 + S_MIN_U32 = 19 + S_MAX_I32 = 20 + S_MAX_U32 = 21 + S_AND_B32 = 22 + S_AND_B64 = 23 + S_OR_B32 = 24 + S_OR_B64 = 25 + S_XOR_B32 = 26 + S_XOR_B64 = 27 + S_NAND_B32 = 28 + S_NAND_B64 = 29 + S_NOR_B32 = 30 + S_NOR_B64 = 31 + S_XNOR_B32 = 32 + S_XNOR_B64 = 33 + S_AND_NOT1_B32 = 34 + S_AND_NOT1_B64 = 35 + S_OR_NOT1_B32 = 36 + S_OR_NOT1_B64 = 37 + S_BFE_U32 = 38 + S_BFE_I32 = 39 + S_BFE_U64 = 40 + S_BFE_I64 = 41 + S_BFM_B32 = 42 + S_BFM_B64 = 43 + S_MUL_I32 = 44 + S_MUL_HI_U32 = 45 + S_MUL_HI_I32 = 46 + S_CSELECT_B32 = 48 + S_CSELECT_B64 = 49 + S_PACK_LL_B32_B16 = 50 + S_PACK_LH_B32_B16 = 51 + S_PACK_HH_B32_B16 = 52 + S_PACK_HL_B32_B16 = 53 + S_ADD_F32 = 64 + S_SUB_F32 = 65 + S_MIN_NUM_F32 = 66 + S_MAX_NUM_F32 = 67 + S_MUL_F32 = 68 + S_FMAAK_F32 = 69 + S_FMAMK_F32 = 70 + S_FMAC_F32 = 71 + S_CVT_PK_RTZ_F16_F32 = 72 + S_ADD_F16 = 73 + S_SUB_F16 = 74 + S_MIN_NUM_F16 = 75 + S_MAX_NUM_F16 = 76 + S_MUL_F16 = 77 + S_FMAC_F16 = 78 + S_MINIMUM_F32 = 79 + S_MAXIMUM_F32 = 80 + S_MINIMUM_F16 = 81 + S_MAXIMUM_F16 = 82 + S_ADD_NC_U64 = 83 + S_SUB_NC_U64 = 84 + S_MUL_U64 = 85 + +class SOPCOp(IntEnum): + S_CMP_EQ_I32 = 0 + S_CMP_LG_I32 = 1 + S_CMP_GT_I32 = 2 + S_CMP_GE_I32 = 3 + S_CMP_LT_I32 = 4 + S_CMP_LE_I32 = 5 + S_CMP_EQ_U32 = 6 + S_CMP_LG_U32 = 7 + S_CMP_GT_U32 = 8 + S_CMP_GE_U32 = 9 + S_CMP_LT_U32 = 10 + S_CMP_LE_U32 = 11 + S_BITCMP0_B32 = 12 + S_BITCMP1_B32 = 13 + S_BITCMP0_B64 = 14 + S_BITCMP1_B64 = 15 + S_CMP_EQ_U64 = 16 + S_CMP_LG_U64 = 17 + S_CMP_LT_F32 = 65 + S_CMP_EQ_F32 = 66 + S_CMP_LE_F32 = 67 + S_CMP_GT_F32 = 68 + S_CMP_LG_F32 = 69 + S_CMP_GE_F32 = 70 + S_CMP_O_F32 = 71 + S_CMP_U_F32 = 72 + S_CMP_NGE_F32 = 73 + S_CMP_NLG_F32 = 74 + S_CMP_NGT_F32 = 75 + S_CMP_NLE_F32 = 76 + S_CMP_NEQ_F32 = 77 + S_CMP_NLT_F32 = 78 + S_CMP_LT_F16 = 81 + S_CMP_EQ_F16 = 82 + S_CMP_LE_F16 = 83 + S_CMP_GT_F16 = 84 + S_CMP_LG_F16 = 85 + S_CMP_GE_F16 = 86 + S_CMP_O_F16 = 87 + S_CMP_U_F16 = 88 + S_CMP_NGE_F16 = 89 + S_CMP_NLG_F16 = 90 + S_CMP_NGT_F16 = 91 + S_CMP_NLE_F16 = 92 + S_CMP_NEQ_F16 = 93 + S_CMP_NLT_F16 = 94 + +class SOPKOp(IntEnum): + S_MOVK_I32 = 0 + S_VERSION = 1 + S_CMOVK_I32 = 2 + S_ADDK_CO_I32 = 15 + S_MULK_I32 = 16 + S_GETREG_B32 = 17 + S_SETREG_B32 = 18 + S_SETREG_IMM32_B32 = 19 + S_CALL_B64 = 20 + +class SOPPOp(IntEnum): + S_NOP = 0 + S_SETKILL = 1 + S_SETHALT = 2 + S_SLEEP = 3 + S_CLAUSE = 5 + S_DELAY_ALU = 7 + S_WAIT_ALU = 8 + S_WAITCNT = 9 + S_WAIT_IDLE = 10 + S_WAIT_EVENT = 11 + S_TRAP = 16 + S_ROUND_MODE = 17 + S_DENORM_MODE = 18 + S_BARRIER_WAIT = 20 + S_CODE_END = 31 + S_BRANCH = 32 + S_CBRANCH_SCC0 = 33 + S_CBRANCH_SCC1 = 34 + S_CBRANCH_VCCZ = 35 + S_CBRANCH_VCCNZ = 36 + S_CBRANCH_EXECZ = 37 + S_CBRANCH_EXECNZ = 38 + S_ENDPGM = 48 + S_ENDPGM_SAVED = 49 + S_WAKEUP = 52 + S_SETPRIO = 53 + S_SENDMSG = 54 + S_SENDMSGHALT = 55 + S_INCPERFLEVEL = 56 + S_DECPERFLEVEL = 57 + S_ICACHE_INV = 60 + S_WAIT_LOADCNT = 64 + S_WAIT_STORECNT = 65 + S_WAIT_SAMPLECNT = 66 + S_WAIT_BVHCNT = 67 + S_WAIT_EXPCNT = 68 + S_WAIT_DSCNT = 70 + S_WAIT_KMCNT = 71 + S_WAIT_LOADCNT_DSCNT = 72 + S_WAIT_STORECNT_DSCNT = 73 + +class VBUFFEROp(IntEnum): + BUFFER_LOAD_FORMAT_X = 0 + BUFFER_LOAD_FORMAT_XY = 1 + BUFFER_LOAD_FORMAT_XYZ = 2 + BUFFER_LOAD_FORMAT_XYZW = 3 + BUFFER_STORE_FORMAT_X = 4 + BUFFER_STORE_FORMAT_XY = 5 + BUFFER_STORE_FORMAT_XYZ = 6 + BUFFER_STORE_FORMAT_XYZW = 7 + BUFFER_LOAD_D16_FORMAT_X = 8 + BUFFER_LOAD_D16_FORMAT_XY = 9 + BUFFER_LOAD_D16_FORMAT_XYZ = 10 + BUFFER_LOAD_D16_FORMAT_XYZW = 11 + BUFFER_STORE_D16_FORMAT_X = 12 + BUFFER_STORE_D16_FORMAT_XY = 13 + BUFFER_STORE_D16_FORMAT_XYZ = 14 + BUFFER_STORE_D16_FORMAT_XYZW = 15 + BUFFER_LOAD_U8 = 16 + BUFFER_LOAD_I8 = 17 + BUFFER_LOAD_U16 = 18 + BUFFER_LOAD_I16 = 19 + BUFFER_LOAD_B32 = 20 + BUFFER_LOAD_B64 = 21 + BUFFER_LOAD_B96 = 22 + BUFFER_LOAD_B128 = 23 + BUFFER_STORE_B8 = 24 + BUFFER_STORE_B16 = 25 + BUFFER_STORE_B32 = 26 + BUFFER_STORE_B64 = 27 + BUFFER_STORE_B96 = 28 + BUFFER_STORE_B128 = 29 + BUFFER_LOAD_D16_U8 = 30 + BUFFER_LOAD_D16_I8 = 31 + BUFFER_LOAD_D16_B16 = 32 + BUFFER_LOAD_D16_HI_U8 = 33 + BUFFER_LOAD_D16_HI_I8 = 34 + BUFFER_LOAD_D16_HI_B16 = 35 + BUFFER_STORE_D16_HI_B8 = 36 + BUFFER_STORE_D16_HI_B16 = 37 + BUFFER_LOAD_D16_HI_FORMAT_X = 38 + BUFFER_STORE_D16_HI_FORMAT_X = 39 + BUFFER_ATOMIC_SWAP_B32 = 51 + BUFFER_ATOMIC_CMPSWAP_B32 = 52 + BUFFER_ATOMIC_ADD_U32 = 53 + BUFFER_ATOMIC_SUB_U32 = 54 + BUFFER_ATOMIC_SUB_CLAMP_U32 = 55 + BUFFER_ATOMIC_MIN_I32 = 56 + BUFFER_ATOMIC_MIN_U32 = 57 + BUFFER_ATOMIC_MAX_I32 = 58 + BUFFER_ATOMIC_MAX_U32 = 59 + BUFFER_ATOMIC_AND_B32 = 60 + BUFFER_ATOMIC_OR_B32 = 61 + BUFFER_ATOMIC_XOR_B32 = 62 + BUFFER_ATOMIC_INC_U32 = 63 + BUFFER_ATOMIC_DEC_U32 = 64 + BUFFER_ATOMIC_SWAP_B64 = 65 + BUFFER_ATOMIC_CMPSWAP_B64 = 66 + BUFFER_ATOMIC_ADD_U64 = 67 + BUFFER_ATOMIC_SUB_U64 = 68 + BUFFER_ATOMIC_MIN_I64 = 69 + BUFFER_ATOMIC_MIN_U64 = 70 + BUFFER_ATOMIC_MAX_I64 = 71 + BUFFER_ATOMIC_MAX_U64 = 72 + BUFFER_ATOMIC_AND_B64 = 73 + BUFFER_ATOMIC_OR_B64 = 74 + BUFFER_ATOMIC_XOR_B64 = 75 + BUFFER_ATOMIC_INC_U64 = 76 + BUFFER_ATOMIC_DEC_U64 = 77 + BUFFER_ATOMIC_COND_SUB_U32 = 80 + BUFFER_ATOMIC_MIN_NUM_F32 = 81 + BUFFER_ATOMIC_MAX_NUM_F32 = 82 + BUFFER_ATOMIC_ADD_F32 = 86 + BUFFER_ATOMIC_PK_ADD_F16 = 89 + BUFFER_ATOMIC_PK_ADD_BF16 = 90 + TBUFFER_LOAD_FORMAT_X = 128 + TBUFFER_LOAD_FORMAT_XY = 129 + TBUFFER_LOAD_FORMAT_XYZ = 130 + TBUFFER_LOAD_FORMAT_XYZW = 131 + TBUFFER_STORE_FORMAT_X = 132 + TBUFFER_STORE_FORMAT_XY = 133 + TBUFFER_STORE_FORMAT_XYZ = 134 + TBUFFER_STORE_FORMAT_XYZW = 135 + TBUFFER_LOAD_D16_FORMAT_X = 136 + TBUFFER_LOAD_D16_FORMAT_XY = 137 + TBUFFER_LOAD_D16_FORMAT_XYZ = 138 + TBUFFER_LOAD_D16_FORMAT_XYZW = 139 + TBUFFER_STORE_D16_FORMAT_X = 140 + TBUFFER_STORE_D16_FORMAT_XY = 141 + TBUFFER_STORE_D16_FORMAT_XYZ = 142 + TBUFFER_STORE_D16_FORMAT_XYZW = 143 + +class VDSDIROp(IntEnum): + DS_PARAM_LOAD = 0 + DS_DIRECT_LOAD = 1 + +class VFLATOp(IntEnum): + FLAT_LOAD_U8 = 16 + FLAT_LOAD_I8 = 17 + FLAT_LOAD_U16 = 18 + FLAT_LOAD_I16 = 19 + FLAT_LOAD_B32 = 20 + FLAT_LOAD_B64 = 21 + FLAT_LOAD_B96 = 22 + FLAT_LOAD_B128 = 23 + FLAT_STORE_B8 = 24 + FLAT_STORE_B16 = 25 + FLAT_STORE_B32 = 26 + FLAT_STORE_B64 = 27 + FLAT_STORE_B96 = 28 + FLAT_STORE_B128 = 29 + FLAT_LOAD_D16_U8 = 30 + FLAT_LOAD_D16_I8 = 31 + FLAT_LOAD_D16_B16 = 32 + FLAT_LOAD_D16_HI_U8 = 33 + FLAT_LOAD_D16_HI_I8 = 34 + FLAT_LOAD_D16_HI_B16 = 35 + FLAT_STORE_D16_HI_B8 = 36 + FLAT_STORE_D16_HI_B16 = 37 + FLAT_ATOMIC_SWAP_B32 = 51 + FLAT_ATOMIC_CMPSWAP_B32 = 52 + FLAT_ATOMIC_ADD_U32 = 53 + FLAT_ATOMIC_SUB_U32 = 54 + FLAT_ATOMIC_SUB_CLAMP_U32 = 55 + FLAT_ATOMIC_MIN_I32 = 56 + FLAT_ATOMIC_MIN_U32 = 57 + FLAT_ATOMIC_MAX_I32 = 58 + FLAT_ATOMIC_MAX_U32 = 59 + FLAT_ATOMIC_AND_B32 = 60 + FLAT_ATOMIC_OR_B32 = 61 + FLAT_ATOMIC_XOR_B32 = 62 + FLAT_ATOMIC_INC_U32 = 63 + FLAT_ATOMIC_DEC_U32 = 64 + FLAT_ATOMIC_SWAP_B64 = 65 + FLAT_ATOMIC_CMPSWAP_B64 = 66 + FLAT_ATOMIC_ADD_U64 = 67 + FLAT_ATOMIC_SUB_U64 = 68 + FLAT_ATOMIC_MIN_I64 = 69 + FLAT_ATOMIC_MIN_U64 = 70 + FLAT_ATOMIC_MAX_I64 = 71 + FLAT_ATOMIC_MAX_U64 = 72 + FLAT_ATOMIC_AND_B64 = 73 + FLAT_ATOMIC_OR_B64 = 74 + FLAT_ATOMIC_XOR_B64 = 75 + FLAT_ATOMIC_INC_U64 = 76 + FLAT_ATOMIC_DEC_U64 = 77 + FLAT_ATOMIC_COND_SUB_U32 = 80 + FLAT_ATOMIC_MIN_NUM_F32 = 81 + FLAT_ATOMIC_MAX_NUM_F32 = 82 + FLAT_ATOMIC_ADD_F32 = 86 + FLAT_ATOMIC_PK_ADD_F16 = 89 + FLAT_ATOMIC_PK_ADD_BF16 = 90 + +class VGLOBALOp(IntEnum): + GLOBAL_LOAD_U8 = 16 + GLOBAL_LOAD_I8 = 17 + GLOBAL_LOAD_U16 = 18 + GLOBAL_LOAD_I16 = 19 + GLOBAL_LOAD_B32 = 20 + GLOBAL_LOAD_B64 = 21 + GLOBAL_LOAD_B96 = 22 + GLOBAL_LOAD_B128 = 23 + GLOBAL_STORE_B8 = 24 + GLOBAL_STORE_B16 = 25 + GLOBAL_STORE_B32 = 26 + GLOBAL_STORE_B64 = 27 + GLOBAL_STORE_B96 = 28 + GLOBAL_STORE_B128 = 29 + GLOBAL_LOAD_D16_U8 = 30 + GLOBAL_LOAD_D16_I8 = 31 + GLOBAL_LOAD_D16_B16 = 32 + GLOBAL_LOAD_D16_HI_U8 = 33 + GLOBAL_LOAD_D16_HI_I8 = 34 + GLOBAL_LOAD_D16_HI_B16 = 35 + GLOBAL_STORE_D16_HI_B8 = 36 + GLOBAL_STORE_D16_HI_B16 = 37 + GLOBAL_LOAD_ADDTID_B32 = 40 + GLOBAL_STORE_ADDTID_B32 = 41 + GLOBAL_INV = 43 + GLOBAL_WB = 44 + GLOBAL_ATOMIC_SWAP_B32 = 51 + GLOBAL_ATOMIC_CMPSWAP_B32 = 52 + GLOBAL_ATOMIC_ADD_U32 = 53 + GLOBAL_ATOMIC_SUB_U32 = 54 + GLOBAL_ATOMIC_SUB_CLAMP_U32 = 55 + GLOBAL_ATOMIC_MIN_I32 = 56 + GLOBAL_ATOMIC_MIN_U32 = 57 + GLOBAL_ATOMIC_MAX_I32 = 58 + GLOBAL_ATOMIC_MAX_U32 = 59 + GLOBAL_ATOMIC_AND_B32 = 60 + GLOBAL_ATOMIC_OR_B32 = 61 + GLOBAL_ATOMIC_XOR_B32 = 62 + GLOBAL_ATOMIC_INC_U32 = 63 + GLOBAL_ATOMIC_DEC_U32 = 64 + GLOBAL_ATOMIC_SWAP_B64 = 65 + GLOBAL_ATOMIC_CMPSWAP_B64 = 66 + GLOBAL_ATOMIC_ADD_U64 = 67 + GLOBAL_ATOMIC_SUB_U64 = 68 + GLOBAL_ATOMIC_MIN_I64 = 69 + GLOBAL_ATOMIC_MIN_U64 = 70 + GLOBAL_ATOMIC_MAX_I64 = 71 + GLOBAL_ATOMIC_MAX_U64 = 72 + GLOBAL_ATOMIC_AND_B64 = 73 + GLOBAL_ATOMIC_OR_B64 = 74 + GLOBAL_ATOMIC_XOR_B64 = 75 + GLOBAL_ATOMIC_INC_U64 = 76 + GLOBAL_ATOMIC_DEC_U64 = 77 + GLOBAL_WBINV = 79 + GLOBAL_ATOMIC_COND_SUB_U32 = 80 + GLOBAL_ATOMIC_MIN_NUM_F32 = 81 + GLOBAL_ATOMIC_MAX_NUM_F32 = 82 + GLOBAL_LOAD_BLOCK = 83 + GLOBAL_STORE_BLOCK = 84 + GLOBAL_ATOMIC_ADD_F32 = 86 + GLOBAL_LOAD_TR_B128 = 87 + GLOBAL_LOAD_TR_B64 = 88 + GLOBAL_ATOMIC_PK_ADD_F16 = 89 + GLOBAL_ATOMIC_PK_ADD_BF16 = 90 + GLOBAL_ATOMIC_ORDERED_ADD_B64 = 115 + +class VIMAGEOp(IntEnum): + IMAGE_LOAD = 0 + IMAGE_LOAD_MIP = 1 + IMAGE_LOAD_PCK = 2 + IMAGE_LOAD_PCK_SGN = 3 + IMAGE_LOAD_MIP_PCK = 4 + IMAGE_LOAD_MIP_PCK_SGN = 5 + IMAGE_STORE = 6 + IMAGE_STORE_MIP = 7 + IMAGE_STORE_PCK = 8 + IMAGE_STORE_MIP_PCK = 9 + IMAGE_ATOMIC_SWAP = 10 + IMAGE_ATOMIC_CMPSWAP = 11 + IMAGE_ATOMIC_ADD_UINT = 12 + IMAGE_ATOMIC_SUB_UINT = 13 + IMAGE_ATOMIC_MIN_INT = 14 + IMAGE_ATOMIC_MIN_UINT = 15 + IMAGE_ATOMIC_MAX_INT = 16 + IMAGE_ATOMIC_MAX_UINT = 17 + IMAGE_ATOMIC_AND = 18 + IMAGE_ATOMIC_OR = 19 + IMAGE_ATOMIC_XOR = 20 + IMAGE_ATOMIC_INC_UINT = 21 + IMAGE_ATOMIC_DEC_UINT = 22 + IMAGE_GET_RESINFO = 23 + IMAGE_BVH_INTERSECT_RAY = 25 + IMAGE_BVH64_INTERSECT_RAY = 26 + IMAGE_BVH_DUAL_INTERSECT_RAY = 128 + IMAGE_BVH8_INTERSECT_RAY = 129 + IMAGE_ATOMIC_ADD_FLT = 131 + IMAGE_ATOMIC_MIN_FLT = 132 + IMAGE_ATOMIC_MAX_FLT = 133 + IMAGE_ATOMIC_PK_ADD_F16 = 134 + IMAGE_ATOMIC_PK_ADD_BF16 = 135 + +class VINTERPOp(IntEnum): + V_INTERP_P10_F32 = 0 + V_INTERP_P2_F32 = 1 + V_INTERP_P10_F16_F32 = 2 + V_INTERP_P2_F16_F32 = 3 + V_INTERP_P10_RTZ_F16_F32 = 4 + V_INTERP_P2_RTZ_F16_F32 = 5 + +class VOP1Op(IntEnum): + V_NOP = 0 + V_MOV_B32 = 1 + V_READFIRSTLANE_B32 = 2 + V_CVT_I32_F64 = 3 + V_CVT_F64_I32 = 4 + V_CVT_F32_I32 = 5 + V_CVT_F32_U32 = 6 + V_CVT_U32_F32 = 7 + V_CVT_I32_F32 = 8 + V_CVT_F16_F32 = 10 + V_CVT_F32_F16 = 11 + V_CVT_NEAREST_I32_F32 = 12 + V_CVT_FLOOR_I32_F32 = 13 + V_CVT_OFF_F32_I4 = 14 + V_CVT_F32_F64 = 15 + V_CVT_F64_F32 = 16 + V_CVT_F32_UBYTE0 = 17 + V_CVT_F32_UBYTE1 = 18 + V_CVT_F32_UBYTE2 = 19 + V_CVT_F32_UBYTE3 = 20 + V_CVT_U32_F64 = 21 + V_CVT_F64_U32 = 22 + V_TRUNC_F64 = 23 + V_CEIL_F64 = 24 + V_RNDNE_F64 = 25 + V_FLOOR_F64 = 26 + V_PIPEFLUSH = 27 + V_MOV_B16 = 28 + V_FRACT_F32 = 32 + V_TRUNC_F32 = 33 + V_CEIL_F32 = 34 + V_RNDNE_F32 = 35 + V_FLOOR_F32 = 36 + V_EXP_F32 = 37 + V_LOG_F32 = 39 + V_RCP_F32 = 42 + V_RCP_IFLAG_F32 = 43 + V_RSQ_F32 = 46 + V_RCP_F64 = 47 + V_RSQ_F64 = 49 + V_SQRT_F32 = 51 + V_SQRT_F64 = 52 + V_SIN_F32 = 53 + V_COS_F32 = 54 + V_NOT_B32 = 55 + V_BFREV_B32 = 56 + V_CLZ_I32_U32 = 57 + V_CTZ_I32_B32 = 58 + V_CLS_I32 = 59 + V_FREXP_EXP_I32_F64 = 60 + V_FREXP_MANT_F64 = 61 + V_FRACT_F64 = 62 + V_FREXP_EXP_I32_F32 = 63 + V_FREXP_MANT_F32 = 64 + V_MOVRELD_B32 = 66 + V_MOVRELS_B32 = 67 + V_MOVRELSD_B32 = 68 + V_MOVRELSD_2_B32 = 72 + V_CVT_F16_U16 = 80 + V_CVT_F16_I16 = 81 + V_CVT_U16_F16 = 82 + V_CVT_I16_F16 = 83 + V_RCP_F16 = 84 + V_SQRT_F16 = 85 + V_RSQ_F16 = 86 + V_LOG_F16 = 87 + V_EXP_F16 = 88 + V_FREXP_MANT_F16 = 89 + V_FREXP_EXP_I16_F16 = 90 + V_FLOOR_F16 = 91 + V_CEIL_F16 = 92 + V_TRUNC_F16 = 93 + V_RNDNE_F16 = 94 + V_FRACT_F16 = 95 + V_SIN_F16 = 96 + V_COS_F16 = 97 + V_SAT_PK_U8_I16 = 98 + V_CVT_NORM_I16_F16 = 99 + V_CVT_NORM_U16_F16 = 100 + V_SWAP_B32 = 101 + V_SWAP_B16 = 102 + V_PERMLANE64_B32 = 103 + V_SWAPREL_B32 = 104 + V_NOT_B16 = 105 + V_CVT_I32_I16 = 106 + V_CVT_U32_U16 = 107 + V_CVT_F32_FP8 = 108 + V_CVT_F32_BF8 = 109 + V_CVT_PK_F32_FP8 = 110 + V_CVT_PK_F32_BF8 = 111 + +class VOP2Op(IntEnum): + V_CNDMASK_B32 = 1 + V_ADD_F64 = 2 + V_ADD_F32 = 3 + V_SUB_F32 = 4 + V_SUBREV_F32 = 5 + V_MUL_F64 = 6 + V_MUL_DX9_ZERO_F32 = 7 + V_MUL_F32 = 8 + V_MUL_I32_I24 = 9 + V_MUL_HI_I32_I24 = 10 + V_MUL_U32_U24 = 11 + V_MUL_HI_U32_U24 = 12 + V_MIN_NUM_F64 = 13 + V_MAX_NUM_F64 = 14 + V_MIN_I32 = 17 + V_MAX_I32 = 18 + V_MIN_U32 = 19 + V_MAX_U32 = 20 + V_MIN_NUM_F32 = 21 + V_MAX_NUM_F32 = 22 + V_LSHLREV_B32 = 24 + V_LSHRREV_B32 = 25 + V_ASHRREV_I32 = 26 + V_AND_B32 = 27 + V_OR_B32 = 28 + V_XOR_B32 = 29 + V_XNOR_B32 = 30 + V_LSHLREV_B64 = 31 + V_ADD_CO_CI_U32 = 32 + V_SUB_CO_CI_U32 = 33 + V_SUBREV_CO_CI_U32 = 34 + V_ADD_NC_U32 = 37 + V_SUB_NC_U32 = 38 + V_SUBREV_NC_U32 = 39 + V_FMAC_F32 = 43 + V_FMAMK_F32 = 44 + V_FMAAK_F32 = 45 + V_CVT_PK_RTZ_F16_F32 = 47 + V_MIN_NUM_F16 = 48 + V_MAX_NUM_F16 = 49 + V_ADD_F16 = 50 + V_SUB_F16 = 51 + V_SUBREV_F16 = 52 + V_MUL_F16 = 53 + V_FMAC_F16 = 54 + V_FMAMK_F16 = 55 + V_FMAAK_F16 = 56 + V_LDEXP_F16 = 59 + V_PK_FMAC_F16 = 60 + +class VOP3Op(IntEnum): + V_CMP_LT_F16 = 1 + V_CMP_EQ_F16 = 2 + V_CMP_LE_F16 = 3 + V_CMP_GT_F16 = 4 + V_CMP_LG_F16 = 5 + V_CMP_GE_F16 = 6 + V_CMP_O_F16 = 7 + V_CMP_U_F16 = 8 + V_CMP_NGE_F16 = 9 + V_CMP_NLG_F16 = 10 + V_CMP_NGT_F16 = 11 + V_CMP_NLE_F16 = 12 + V_CMP_NEQ_F16 = 13 + V_CMP_NLT_F16 = 14 + V_CMP_LT_F32 = 17 + V_CMP_EQ_F32 = 18 + V_CMP_LE_F32 = 19 + V_CMP_GT_F32 = 20 + V_CMP_LG_F32 = 21 + V_CMP_GE_F32 = 22 + V_CMP_O_F32 = 23 + V_CMP_U_F32 = 24 + V_CMP_NGE_F32 = 25 + V_CMP_NLG_F32 = 26 + V_CMP_NGT_F32 = 27 + V_CMP_NLE_F32 = 28 + V_CMP_NEQ_F32 = 29 + V_CMP_NLT_F32 = 30 + V_CMP_LT_F64 = 33 + V_CMP_EQ_F64 = 34 + V_CMP_LE_F64 = 35 + V_CMP_GT_F64 = 36 + V_CMP_LG_F64 = 37 + V_CMP_GE_F64 = 38 + V_CMP_O_F64 = 39 + V_CMP_U_F64 = 40 + V_CMP_NGE_F64 = 41 + V_CMP_NLG_F64 = 42 + V_CMP_NGT_F64 = 43 + V_CMP_NLE_F64 = 44 + V_CMP_NEQ_F64 = 45 + V_CMP_NLT_F64 = 46 + V_CMP_LT_I16 = 49 + V_CMP_EQ_I16 = 50 + V_CMP_LE_I16 = 51 + V_CMP_GT_I16 = 52 + V_CMP_NE_I16 = 53 + V_CMP_GE_I16 = 54 + V_CMP_LT_U16 = 57 + V_CMP_EQ_U16 = 58 + V_CMP_LE_U16 = 59 + V_CMP_GT_U16 = 60 + V_CMP_NE_U16 = 61 + V_CMP_GE_U16 = 62 + V_CMP_LT_I32 = 65 + V_CMP_EQ_I32 = 66 + V_CMP_LE_I32 = 67 + V_CMP_GT_I32 = 68 + V_CMP_NE_I32 = 69 + V_CMP_GE_I32 = 70 + V_CMP_LT_U32 = 73 + V_CMP_EQ_U32 = 74 + V_CMP_LE_U32 = 75 + V_CMP_GT_U32 = 76 + V_CMP_NE_U32 = 77 + V_CMP_GE_U32 = 78 + V_CMP_LT_I64 = 81 + V_CMP_EQ_I64 = 82 + V_CMP_LE_I64 = 83 + V_CMP_GT_I64 = 84 + V_CMP_NE_I64 = 85 + V_CMP_GE_I64 = 86 + V_CMP_LT_U64 = 89 + V_CMP_EQ_U64 = 90 + V_CMP_LE_U64 = 91 + V_CMP_GT_U64 = 92 + V_CMP_NE_U64 = 93 + V_CMP_GE_U64 = 94 + V_CMP_CLASS_F16 = 125 + V_CMP_CLASS_F32 = 126 + V_CMP_CLASS_F64 = 127 + V_CMPX_LT_F16 = 129 + V_CMPX_EQ_F16 = 130 + V_CMPX_LE_F16 = 131 + V_CMPX_GT_F16 = 132 + V_CMPX_LG_F16 = 133 + V_CMPX_GE_F16 = 134 + V_CMPX_O_F16 = 135 + V_CMPX_U_F16 = 136 + V_CMPX_NGE_F16 = 137 + V_CMPX_NLG_F16 = 138 + V_CMPX_NGT_F16 = 139 + V_CMPX_NLE_F16 = 140 + V_CMPX_NEQ_F16 = 141 + V_CMPX_NLT_F16 = 142 + V_CMPX_LT_F32 = 145 + V_CMPX_EQ_F32 = 146 + V_CMPX_LE_F32 = 147 + V_CMPX_GT_F32 = 148 + V_CMPX_LG_F32 = 149 + V_CMPX_GE_F32 = 150 + V_CMPX_O_F32 = 151 + V_CMPX_U_F32 = 152 + V_CMPX_NGE_F32 = 153 + V_CMPX_NLG_F32 = 154 + V_CMPX_NGT_F32 = 155 + V_CMPX_NLE_F32 = 156 + V_CMPX_NEQ_F32 = 157 + V_CMPX_NLT_F32 = 158 + V_CMPX_LT_F64 = 161 + V_CMPX_EQ_F64 = 162 + V_CMPX_LE_F64 = 163 + V_CMPX_GT_F64 = 164 + V_CMPX_LG_F64 = 165 + V_CMPX_GE_F64 = 166 + V_CMPX_O_F64 = 167 + V_CMPX_U_F64 = 168 + V_CMPX_NGE_F64 = 169 + V_CMPX_NLG_F64 = 170 + V_CMPX_NGT_F64 = 171 + V_CMPX_NLE_F64 = 172 + V_CMPX_NEQ_F64 = 173 + V_CMPX_NLT_F64 = 174 + V_CMPX_LT_I16 = 177 + V_CMPX_EQ_I16 = 178 + V_CMPX_LE_I16 = 179 + V_CMPX_GT_I16 = 180 + V_CMPX_NE_I16 = 181 + V_CMPX_GE_I16 = 182 + V_CMPX_LT_U16 = 185 + V_CMPX_EQ_U16 = 186 + V_CMPX_LE_U16 = 187 + V_CMPX_GT_U16 = 188 + V_CMPX_NE_U16 = 189 + V_CMPX_GE_U16 = 190 + V_CMPX_LT_I32 = 193 + V_CMPX_EQ_I32 = 194 + V_CMPX_LE_I32 = 195 + V_CMPX_GT_I32 = 196 + V_CMPX_NE_I32 = 197 + V_CMPX_GE_I32 = 198 + V_CMPX_LT_U32 = 201 + V_CMPX_EQ_U32 = 202 + V_CMPX_LE_U32 = 203 + V_CMPX_GT_U32 = 204 + V_CMPX_NE_U32 = 205 + V_CMPX_GE_U32 = 206 + V_CMPX_LT_I64 = 209 + V_CMPX_EQ_I64 = 210 + V_CMPX_LE_I64 = 211 + V_CMPX_GT_I64 = 212 + V_CMPX_NE_I64 = 213 + V_CMPX_GE_I64 = 214 + V_CMPX_LT_U64 = 217 + V_CMPX_EQ_U64 = 218 + V_CMPX_LE_U64 = 219 + V_CMPX_GT_U64 = 220 + V_CMPX_NE_U64 = 221 + V_CMPX_GE_U64 = 222 + V_CMPX_CLASS_F16 = 253 + V_CMPX_CLASS_F32 = 254 + V_CMPX_CLASS_F64 = 255 + V_CNDMASK_B32 = 257 + V_ADD_F64 = 258 + V_ADD_F32 = 259 + V_SUB_F32 = 260 + V_SUBREV_F32 = 261 + V_MUL_F64 = 262 + V_MUL_DX9_ZERO_F32 = 263 + V_MUL_F32 = 264 + V_MUL_I32_I24 = 265 + V_MUL_HI_I32_I24 = 266 + V_MUL_U32_U24 = 267 + V_MUL_HI_U32_U24 = 268 + V_MIN_NUM_F64 = 269 + V_MAX_NUM_F64 = 270 + V_MIN_I32 = 273 + V_MAX_I32 = 274 + V_MIN_U32 = 275 + V_MAX_U32 = 276 + V_MIN_NUM_F32 = 277 + V_MAX_NUM_F32 = 278 + V_LSHLREV_B32 = 280 + V_LSHRREV_B32 = 281 + V_ASHRREV_I32 = 282 + V_AND_B32 = 283 + V_OR_B32 = 284 + V_XOR_B32 = 285 + V_XNOR_B32 = 286 + V_LSHLREV_B64 = 287 + V_ADD_NC_U32 = 293 + V_SUB_NC_U32 = 294 + V_SUBREV_NC_U32 = 295 + V_FMAC_F32 = 299 + V_CVT_PK_RTZ_F16_F32 = 303 + V_MIN_NUM_F16 = 304 + V_MAX_NUM_F16 = 305 + V_ADD_F16 = 306 + V_SUB_F16 = 307 + V_SUBREV_F16 = 308 + V_MUL_F16 = 309 + V_FMAC_F16 = 310 + V_LDEXP_F16 = 315 + V_NOP = 384 + V_MOV_B32 = 385 + V_READFIRSTLANE_B32 = 386 + V_CVT_I32_F64 = 387 + V_CVT_F64_I32 = 388 + V_CVT_F32_I32 = 389 + V_CVT_F32_U32 = 390 + V_CVT_U32_F32 = 391 + V_CVT_I32_F32 = 392 + V_CVT_F16_F32 = 394 + V_CVT_F32_F16 = 395 + V_CVT_NEAREST_I32_F32 = 396 + V_CVT_FLOOR_I32_F32 = 397 + V_CVT_OFF_F32_I4 = 398 + V_CVT_F32_F64 = 399 + V_CVT_F64_F32 = 400 + V_CVT_F32_UBYTE0 = 401 + V_CVT_F32_UBYTE1 = 402 + V_CVT_F32_UBYTE2 = 403 + V_CVT_F32_UBYTE3 = 404 + V_CVT_U32_F64 = 405 + V_CVT_F64_U32 = 406 + V_TRUNC_F64 = 407 + V_CEIL_F64 = 408 + V_RNDNE_F64 = 409 + V_FLOOR_F64 = 410 + V_PIPEFLUSH = 411 + V_MOV_B16 = 412 + V_FRACT_F32 = 416 + V_TRUNC_F32 = 417 + V_CEIL_F32 = 418 + V_RNDNE_F32 = 419 + V_FLOOR_F32 = 420 + V_EXP_F32 = 421 + V_LOG_F32 = 423 + V_RCP_F32 = 426 + V_RCP_IFLAG_F32 = 427 + V_RSQ_F32 = 430 + V_RCP_F64 = 431 + V_RSQ_F64 = 433 + V_SQRT_F32 = 435 + V_SQRT_F64 = 436 + V_SIN_F32 = 437 + V_COS_F32 = 438 + V_NOT_B32 = 439 + V_BFREV_B32 = 440 + V_CLZ_I32_U32 = 441 + V_CTZ_I32_B32 = 442 + V_CLS_I32 = 443 + V_FREXP_EXP_I32_F64 = 444 + V_FREXP_MANT_F64 = 445 + V_FRACT_F64 = 446 + V_FREXP_EXP_I32_F32 = 447 + V_FREXP_MANT_F32 = 448 + V_MOVRELD_B32 = 450 + V_MOVRELS_B32 = 451 + V_MOVRELSD_B32 = 452 + V_MOVRELSD_2_B32 = 456 + V_CVT_F16_U16 = 464 + V_CVT_F16_I16 = 465 + V_CVT_U16_F16 = 466 + V_CVT_I16_F16 = 467 + V_RCP_F16 = 468 + V_SQRT_F16 = 469 + V_RSQ_F16 = 470 + V_LOG_F16 = 471 + V_EXP_F16 = 472 + V_FREXP_MANT_F16 = 473 + V_FREXP_EXP_I16_F16 = 474 + V_FLOOR_F16 = 475 + V_CEIL_F16 = 476 + V_TRUNC_F16 = 477 + V_RNDNE_F16 = 478 + V_FRACT_F16 = 479 + V_SIN_F16 = 480 + V_COS_F16 = 481 + V_SAT_PK_U8_I16 = 482 + V_CVT_NORM_I16_F16 = 483 + V_CVT_NORM_U16_F16 = 484 + V_NOT_B16 = 489 + V_CVT_I32_I16 = 490 + V_CVT_U32_U16 = 491 + V_CVT_F32_FP8 = 492 + V_CVT_F32_BF8 = 493 + V_CVT_PK_F32_FP8 = 494 + V_CVT_PK_F32_BF8 = 495 + V_FMA_DX9_ZERO_F32 = 521 + V_MAD_I32_I24 = 522 + V_MAD_U32_U24 = 523 + V_CUBEID_F32 = 524 + V_CUBESC_F32 = 525 + V_CUBETC_F32 = 526 + V_CUBEMA_F32 = 527 + V_BFE_U32 = 528 + V_BFE_I32 = 529 + V_BFI_B32 = 530 + V_FMA_F32 = 531 + V_FMA_F64 = 532 + V_LERP_U8 = 533 + V_ALIGNBIT_B32 = 534 + V_ALIGNBYTE_B32 = 535 + V_MULLIT_F32 = 536 + V_MIN3_I32 = 538 + V_MIN3_U32 = 539 + V_MAX3_I32 = 541 + V_MAX3_U32 = 542 + V_MED3_I32 = 544 + V_MED3_U32 = 545 + V_SAD_U8 = 546 + V_SAD_HI_U8 = 547 + V_SAD_U16 = 548 + V_SAD_U32 = 549 + V_CVT_PK_U8_F32 = 550 + V_DIV_FIXUP_F32 = 551 + V_DIV_FIXUP_F64 = 552 + V_MIN3_NUM_F32 = 553 + V_MAX3_NUM_F32 = 554 + V_MIN3_NUM_F16 = 555 + V_MAX3_NUM_F16 = 556 + V_MINIMUM3_F32 = 557 + V_MAXIMUM3_F32 = 558 + V_MINIMUM3_F16 = 559 + V_MAXIMUM3_F16 = 560 + V_MED3_NUM_F32 = 561 + V_MED3_NUM_F16 = 562 + V_DIV_FMAS_F32 = 567 + V_DIV_FMAS_F64 = 568 + V_MSAD_U8 = 569 + V_QSAD_PK_U16_U8 = 570 + V_MQSAD_PK_U16_U8 = 571 + V_MQSAD_U32_U8 = 573 + V_XOR3_B32 = 576 + V_MAD_U16 = 577 + V_PERM_B32 = 580 + V_XAD_U32 = 581 + V_LSHL_ADD_U32 = 582 + V_ADD_LSHL_U32 = 583 + V_FMA_F16 = 584 + V_MIN3_I16 = 586 + V_MIN3_U16 = 587 + V_MAX3_I16 = 589 + V_MAX3_U16 = 590 + V_MED3_I16 = 592 + V_MED3_U16 = 593 + V_MAD_I16 = 595 + V_DIV_FIXUP_F16 = 596 + V_ADD3_U32 = 597 + V_LSHL_OR_B32 = 598 + V_AND_OR_B32 = 599 + V_OR3_B32 = 600 + V_MAD_U32_U16 = 601 + V_MAD_I32_I16 = 602 + V_PERMLANE16_B32 = 603 + V_PERMLANEX16_B32 = 604 + V_CNDMASK_B16 = 605 + V_MAXMIN_U32 = 610 + V_MINMAX_U32 = 611 + V_MAXMIN_I32 = 612 + V_MINMAX_I32 = 613 + V_DOT2_F16_F16 = 614 + V_DOT2_BF16_BF16 = 615 + V_MINMAX_NUM_F32 = 616 + V_MAXMIN_NUM_F32 = 617 + V_MINMAX_NUM_F16 = 618 + V_MAXMIN_NUM_F16 = 619 + V_MINIMUMMAXIMUM_F32 = 620 + V_MAXIMUMMINIMUM_F32 = 621 + V_MINIMUMMAXIMUM_F16 = 622 + V_MAXIMUMMINIMUM_F16 = 623 + V_S_EXP_F32 = 640 + V_S_EXP_F16 = 641 + V_S_LOG_F32 = 642 + V_S_LOG_F16 = 643 + V_S_RCP_F32 = 644 + V_S_RCP_F16 = 645 + V_S_RSQ_F32 = 646 + V_S_RSQ_F16 = 647 + V_S_SQRT_F32 = 648 + V_S_SQRT_F16 = 649 + V_ADD_NC_U16 = 771 + V_SUB_NC_U16 = 772 + V_MUL_LO_U16 = 773 + V_CVT_PK_I16_F32 = 774 + V_CVT_PK_U16_F32 = 775 + V_MAX_U16 = 777 + V_MAX_I16 = 778 + V_MIN_U16 = 779 + V_MIN_I16 = 780 + V_ADD_NC_I16 = 781 + V_SUB_NC_I16 = 782 + V_PERMLANE16_VAR_B32 = 783 + V_PERMLANEX16_VAR_B32 = 784 + V_PACK_B32_F16 = 785 + V_CVT_PK_NORM_I16_F16 = 786 + V_CVT_PK_NORM_U16_F16 = 787 + V_LDEXP_F32 = 796 + V_BFM_B32 = 797 + V_BCNT_U32_B32 = 798 + V_MBCNT_LO_U32_B32 = 799 + V_MBCNT_HI_U32_B32 = 800 + V_CVT_PK_NORM_I16_F32 = 801 + V_CVT_PK_NORM_U16_F32 = 802 + V_CVT_PK_U16_U32 = 803 + V_CVT_PK_I16_I32 = 804 + V_SUB_NC_I32 = 805 + V_ADD_NC_I32 = 806 + V_LDEXP_F64 = 811 + V_MUL_LO_U32 = 812 + V_MUL_HI_U32 = 813 + V_MUL_HI_I32 = 814 + V_TRIG_PREOP_F64 = 815 + V_LSHLREV_B16 = 824 + V_LSHRREV_B16 = 825 + V_ASHRREV_I16 = 826 + V_LSHRREV_B64 = 829 + V_ASHRREV_I64 = 830 + V_MINIMUM_F64 = 833 + V_MAXIMUM_F64 = 834 + V_READLANE_B32 = 864 + V_WRITELANE_B32 = 865 + V_AND_B16 = 866 + V_OR_B16 = 867 + V_XOR_B16 = 868 + V_MINIMUM_F32 = 869 + V_MAXIMUM_F32 = 870 + V_MINIMUM_F16 = 871 + V_MAXIMUM_F16 = 872 + V_CVT_PK_FP8_F32 = 873 + V_CVT_PK_BF8_F32 = 874 + V_CVT_SR_FP8_F32 = 875 + V_CVT_SR_BF8_F32 = 876 + +class VOP3POp(IntEnum): + V_PK_MAD_I16 = 0 + V_PK_MUL_LO_U16 = 1 + V_PK_ADD_I16 = 2 + V_PK_SUB_I16 = 3 + V_PK_LSHLREV_B16 = 4 + V_PK_LSHRREV_B16 = 5 + V_PK_ASHRREV_I16 = 6 + V_PK_MAX_I16 = 7 + V_PK_MIN_I16 = 8 + V_PK_MAD_U16 = 9 + V_PK_ADD_U16 = 10 + V_PK_SUB_U16 = 11 + V_PK_MAX_U16 = 12 + V_PK_MIN_U16 = 13 + V_PK_FMA_F16 = 14 + V_PK_ADD_F16 = 15 + V_PK_MUL_F16 = 16 + V_DOT2_F32_F16 = 19 + V_DOT4_I32_IU8 = 22 + V_DOT4_U32_U8 = 23 + V_DOT8_I32_IU4 = 24 + V_DOT8_U32_U4 = 25 + V_DOT2_F32_BF16 = 26 + V_PK_MIN_NUM_F16 = 27 + V_PK_MAX_NUM_F16 = 28 + V_PK_MINIMUM_F16 = 29 + V_PK_MAXIMUM_F16 = 30 + V_FMA_MIX_F32 = 32 + V_FMA_MIXLO_F16 = 33 + V_FMA_MIXHI_F16 = 34 + V_DOT4_F32_FP8_BF8 = 36 + V_DOT4_F32_BF8_FP8 = 37 + V_DOT4_F32_FP8_FP8 = 38 + V_DOT4_F32_BF8_BF8 = 39 + V_WMMA_F32_16X16X16_F16 = 64 + V_WMMA_F32_16X16X16_BF16 = 65 + V_WMMA_F16_16X16X16_F16 = 66 + V_WMMA_BF16_16X16X16_BF16 = 67 + V_WMMA_I32_16X16X16_IU8 = 68 + V_WMMA_I32_16X16X16_IU4 = 69 + V_WMMA_F32_16X16X16_FP8_FP8 = 70 + V_WMMA_F32_16X16X16_FP8_BF8 = 71 + V_WMMA_F32_16X16X16_BF8_FP8 = 72 + V_WMMA_F32_16X16X16_BF8_BF8 = 73 + V_WMMA_I32_16X16X32_IU4 = 74 + V_SWMMAC_F32_16X16X32_F16 = 80 + V_SWMMAC_F32_16X16X32_BF16 = 81 + V_SWMMAC_F16_16X16X32_F16 = 82 + V_SWMMAC_BF16_16X16X32_BF16 = 83 + V_SWMMAC_I32_16X16X32_IU8 = 84 + V_SWMMAC_I32_16X16X32_IU4 = 85 + V_SWMMAC_I32_16X16X64_IU4 = 86 + V_SWMMAC_F32_16X16X32_FP8_FP8 = 87 + V_SWMMAC_F32_16X16X32_FP8_BF8 = 88 + V_SWMMAC_F32_16X16X32_BF8_FP8 = 89 + V_SWMMAC_F32_16X16X32_BF8_BF8 = 90 + +class VOP3SDOp(IntEnum): + DWORD = 1 + V_ADD_CO_CI_U32 = 288 + V_SUB_CO_CI_U32 = 289 + V_SUBREV_CO_CI_U32 = 290 + V_DIV_SCALE_F32 = 764 + V_DIV_SCALE_F64 = 765 + V_MAD_CO_U64_U32 = 766 + V_MAD_CO_I64_I32 = 767 + V_ADD_CO_U32 = 768 + V_SUB_CO_U32 = 769 + V_SUBREV_CO_U32 = 770 + +class VOPCOp(IntEnum): + V_CMP_LT_F16 = 1 + V_CMP_EQ_F16 = 2 + V_CMP_LE_F16 = 3 + V_CMP_GT_F16 = 4 + V_CMP_LG_F16 = 5 + V_CMP_GE_F16 = 6 + V_CMP_O_F16 = 7 + V_CMP_U_F16 = 8 + V_CMP_NGE_F16 = 9 + V_CMP_NLG_F16 = 10 + V_CMP_NGT_F16 = 11 + V_CMP_NLE_F16 = 12 + V_CMP_NEQ_F16 = 13 + V_CMP_NLT_F16 = 14 + V_CMP_LT_F32 = 17 + V_CMP_EQ_F32 = 18 + V_CMP_LE_F32 = 19 + V_CMP_GT_F32 = 20 + V_CMP_LG_F32 = 21 + V_CMP_GE_F32 = 22 + V_CMP_O_F32 = 23 + V_CMP_U_F32 = 24 + V_CMP_NGE_F32 = 25 + V_CMP_NLG_F32 = 26 + V_CMP_NGT_F32 = 27 + V_CMP_NLE_F32 = 28 + V_CMP_NEQ_F32 = 29 + V_CMP_NLT_F32 = 30 + V_CMP_LT_F64 = 33 + V_CMP_EQ_F64 = 34 + V_CMP_LE_F64 = 35 + V_CMP_GT_F64 = 36 + V_CMP_LG_F64 = 37 + V_CMP_GE_F64 = 38 + V_CMP_O_F64 = 39 + V_CMP_U_F64 = 40 + V_CMP_NGE_F64 = 41 + V_CMP_NLG_F64 = 42 + V_CMP_NGT_F64 = 43 + V_CMP_NLE_F64 = 44 + V_CMP_NEQ_F64 = 45 + V_CMP_NLT_F64 = 46 + V_CMP_LT_I16 = 49 + V_CMP_EQ_I16 = 50 + V_CMP_LE_I16 = 51 + V_CMP_GT_I16 = 52 + V_CMP_NE_I16 = 53 + V_CMP_GE_I16 = 54 + V_CMP_LT_U16 = 57 + V_CMP_EQ_U16 = 58 + V_CMP_LE_U16 = 59 + V_CMP_GT_U16 = 60 + V_CMP_NE_U16 = 61 + V_CMP_GE_U16 = 62 + V_CMP_LT_I32 = 65 + V_CMP_EQ_I32 = 66 + V_CMP_LE_I32 = 67 + V_CMP_GT_I32 = 68 + V_CMP_NE_I32 = 69 + V_CMP_GE_I32 = 70 + V_CMP_LT_U32 = 73 + V_CMP_EQ_U32 = 74 + V_CMP_LE_U32 = 75 + V_CMP_GT_U32 = 76 + V_CMP_NE_U32 = 77 + V_CMP_GE_U32 = 78 + V_CMP_LT_I64 = 81 + V_CMP_EQ_I64 = 82 + V_CMP_LE_I64 = 83 + V_CMP_GT_I64 = 84 + V_CMP_NE_I64 = 85 + V_CMP_GE_I64 = 86 + V_CMP_LT_U64 = 89 + V_CMP_EQ_U64 = 90 + V_CMP_LE_U64 = 91 + V_CMP_GT_U64 = 92 + V_CMP_NE_U64 = 93 + V_CMP_GE_U64 = 94 + V_CMP_CLASS_F16 = 125 + V_CMP_CLASS_F32 = 126 + V_CMP_CLASS_F64 = 127 + V_CMPX_LT_F16 = 129 + V_CMPX_EQ_F16 = 130 + V_CMPX_LE_F16 = 131 + V_CMPX_GT_F16 = 132 + V_CMPX_LG_F16 = 133 + V_CMPX_GE_F16 = 134 + V_CMPX_O_F16 = 135 + V_CMPX_U_F16 = 136 + V_CMPX_NGE_F16 = 137 + V_CMPX_NLG_F16 = 138 + V_CMPX_NGT_F16 = 139 + V_CMPX_NLE_F16 = 140 + V_CMPX_NEQ_F16 = 141 + V_CMPX_NLT_F16 = 142 + V_CMPX_LT_F32 = 145 + V_CMPX_EQ_F32 = 146 + V_CMPX_LE_F32 = 147 + V_CMPX_GT_F32 = 148 + V_CMPX_LG_F32 = 149 + V_CMPX_GE_F32 = 150 + V_CMPX_O_F32 = 151 + V_CMPX_U_F32 = 152 + V_CMPX_NGE_F32 = 153 + V_CMPX_NLG_F32 = 154 + V_CMPX_NGT_F32 = 155 + V_CMPX_NLE_F32 = 156 + V_CMPX_NEQ_F32 = 157 + V_CMPX_NLT_F32 = 158 + V_CMPX_LT_F64 = 161 + V_CMPX_EQ_F64 = 162 + V_CMPX_LE_F64 = 163 + V_CMPX_GT_F64 = 164 + V_CMPX_LG_F64 = 165 + V_CMPX_GE_F64 = 166 + V_CMPX_O_F64 = 167 + V_CMPX_U_F64 = 168 + V_CMPX_NGE_F64 = 169 + V_CMPX_NLG_F64 = 170 + V_CMPX_NGT_F64 = 171 + V_CMPX_NLE_F64 = 172 + V_CMPX_NEQ_F64 = 173 + V_CMPX_NLT_F64 = 174 + V_CMPX_LT_I16 = 177 + V_CMPX_EQ_I16 = 178 + V_CMPX_LE_I16 = 179 + V_CMPX_GT_I16 = 180 + V_CMPX_NE_I16 = 181 + V_CMPX_GE_I16 = 182 + V_CMPX_LT_U16 = 185 + V_CMPX_EQ_U16 = 186 + V_CMPX_LE_U16 = 187 + V_CMPX_GT_U16 = 188 + V_CMPX_NE_U16 = 189 + V_CMPX_GE_U16 = 190 + V_CMPX_LT_I32 = 193 + V_CMPX_EQ_I32 = 194 + V_CMPX_LE_I32 = 195 + V_CMPX_GT_I32 = 196 + V_CMPX_NE_I32 = 197 + V_CMPX_GE_I32 = 198 + V_CMPX_LT_U32 = 201 + V_CMPX_EQ_U32 = 202 + V_CMPX_LE_U32 = 203 + V_CMPX_GT_U32 = 204 + V_CMPX_NE_U32 = 205 + V_CMPX_GE_U32 = 206 + V_CMPX_LT_I64 = 209 + V_CMPX_EQ_I64 = 210 + V_CMPX_LE_I64 = 211 + V_CMPX_GT_I64 = 212 + V_CMPX_NE_I64 = 213 + V_CMPX_GE_I64 = 214 + V_CMPX_LT_U64 = 217 + V_CMPX_EQ_U64 = 218 + V_CMPX_LE_U64 = 219 + V_CMPX_GT_U64 = 220 + V_CMPX_NE_U64 = 221 + V_CMPX_GE_U64 = 222 + V_CMPX_CLASS_F16 = 253 + V_CMPX_CLASS_F32 = 254 + V_CMPX_CLASS_F64 = 255 + +class VOPDOp(IntEnum): + V_DUAL_FMAC_F32 = 0 + V_DUAL_FMAAK_F32 = 1 + V_DUAL_FMAMK_F32 = 2 + V_DUAL_MUL_F32 = 3 + V_DUAL_ADD_F32 = 4 + V_DUAL_SUB_F32 = 5 + V_DUAL_SUBREV_F32 = 6 + V_DUAL_MUL_DX9_ZERO_F32 = 7 + V_DUAL_MOV_B32 = 8 + V_DUAL_CNDMASK_B32 = 9 + V_DUAL_MAX_NUM_F32 = 10 + V_DUAL_MIN_NUM_F32 = 11 + V_DUAL_DOT2ACC_F32_F16 = 12 + V_DUAL_DOT2ACC_F32_BF16 = 13 + V_DUAL_ADD_NC_U32 = 16 + V_DUAL_LSHLREV_B32 = 17 + V_DUAL_AND_B32 = 18 + +class VSAMPLEOp(IntEnum): + IMAGE_MSAA_LOAD = 24 + IMAGE_SAMPLE = 27 + IMAGE_SAMPLE_D = 28 + IMAGE_SAMPLE_L = 29 + IMAGE_SAMPLE_B = 30 + IMAGE_SAMPLE_LZ = 31 + IMAGE_SAMPLE_C = 32 + IMAGE_SAMPLE_C_D = 33 + IMAGE_SAMPLE_C_L = 34 + IMAGE_SAMPLE_C_B = 35 + IMAGE_SAMPLE_C_LZ = 36 + IMAGE_SAMPLE_O = 37 + IMAGE_SAMPLE_D_O = 38 + IMAGE_SAMPLE_L_O = 39 + IMAGE_SAMPLE_B_O = 40 + IMAGE_SAMPLE_LZ_O = 41 + IMAGE_SAMPLE_C_O = 42 + IMAGE_SAMPLE_C_D_O = 43 + IMAGE_SAMPLE_C_L_O = 44 + IMAGE_SAMPLE_C_B_O = 45 + IMAGE_SAMPLE_C_LZ_O = 46 + IMAGE_GATHER4 = 47 + IMAGE_GATHER4_L = 48 + IMAGE_GATHER4_B = 49 + IMAGE_GATHER4_LZ = 50 + IMAGE_GATHER4_C = 51 + IMAGE_GATHER4_C_LZ = 52 + IMAGE_GATHER4_O = 53 + IMAGE_GATHER4_LZ_O = 54 + IMAGE_GATHER4_C_LZ_O = 55 + IMAGE_GET_LOD = 56 + IMAGE_SAMPLE_D_G16 = 57 + IMAGE_SAMPLE_C_D_G16 = 58 + IMAGE_SAMPLE_D_O_G16 = 59 + IMAGE_SAMPLE_C_D_O_G16 = 60 + IMAGE_SAMPLE_CL = 64 + IMAGE_SAMPLE_D_CL = 65 + IMAGE_SAMPLE_B_CL = 66 + IMAGE_SAMPLE_C_CL = 67 + IMAGE_SAMPLE_C_D_CL = 68 + IMAGE_SAMPLE_C_B_CL = 69 + IMAGE_SAMPLE_CL_O = 70 + IMAGE_SAMPLE_D_CL_O = 71 + IMAGE_SAMPLE_B_CL_O = 72 + IMAGE_SAMPLE_C_CL_O = 73 + IMAGE_SAMPLE_C_D_CL_O = 74 + IMAGE_SAMPLE_C_B_CL_O = 75 + IMAGE_SAMPLE_C_D_CL_G16 = 84 + IMAGE_SAMPLE_D_CL_O_G16 = 85 + IMAGE_SAMPLE_C_D_CL_O_G16 = 86 + IMAGE_SAMPLE_D_CL_G16 = 95 + IMAGE_GATHER4_CL = 96 + IMAGE_GATHER4_B_CL = 97 + IMAGE_GATHER4_C_CL = 98 + IMAGE_GATHER4_C_L = 99 + IMAGE_GATHER4_C_B = 100 + IMAGE_GATHER4_C_B_CL = 101 + IMAGE_GATHER4H = 144 + +class VSCRATCHOp(IntEnum): + SCRATCH_LOAD_U8 = 16 + SCRATCH_LOAD_I8 = 17 + SCRATCH_LOAD_U16 = 18 + SCRATCH_LOAD_I16 = 19 + SCRATCH_LOAD_B32 = 20 + SCRATCH_LOAD_B64 = 21 + SCRATCH_LOAD_B96 = 22 + SCRATCH_LOAD_B128 = 23 + SCRATCH_STORE_B8 = 24 + SCRATCH_STORE_B16 = 25 + SCRATCH_STORE_B32 = 26 + SCRATCH_STORE_B64 = 27 + SCRATCH_STORE_B96 = 28 + SCRATCH_STORE_B128 = 29 + SCRATCH_LOAD_D16_U8 = 30 + SCRATCH_LOAD_D16_I8 = 31 + SCRATCH_LOAD_D16_B16 = 32 + SCRATCH_LOAD_D16_HI_U8 = 33 + SCRATCH_LOAD_D16_HI_I8 = 34 + SCRATCH_LOAD_D16_HI_B16 = 35 + SCRATCH_STORE_D16_HI_B8 = 36 + SCRATCH_STORE_D16_HI_B16 = 37 + SCRATCH_LOAD_BLOCK = 83 + SCRATCH_STORE_BLOCK = 84 + +# instruction formats +class DPP16(Inst64): + src0:Src = bits[39:32] + dpp_ctrl = bits[48:40] + fi = bits[50] + bc = bits[51] + src0_neg = bits[52] + src0_abs = bits[53] + src1_neg = bits[54] + src1_abs = bits[55] + bank_mask = bits[59:56] + row_mask = bits[63:60] + +class DPP8(Inst64): + src0:Src = bits[39:32] + lane_sel0 = bits[42:40] + lane_sel1 = bits[45:43] + lane_sel2 = bits[48:46] + lane_sel3 = bits[51:49] + lane_sel4 = bits[54:52] + lane_sel5 = bits[57:55] + lane_sel6 = bits[60:58] + lane_sel7 = bits[63:61] + +class SMEM(Inst64): + encoding = bits[31:26] == 0b111101 + op:Annotated[BitField, SMEMOp] = bits[18:13] + sdata:SGPRField = bits[12:6] + sbase:SGPRField = bits[5:0] + soffset:SSrc = bits[63:57] + scope = bits[22:21] + th = bits[24:23] + ioffset = bits[55:32] + +class SOP1(Inst32): + encoding = bits[31:23] == 0b101111101 + op:Annotated[BitField, SOP1Op] = bits[15:8] + sdst:SGPRField = bits[22:16] + ssrc0:SSrc = bits[7:0] + +class SOP2(Inst32): + encoding = bits[31:30] == 0b10 + op:Annotated[BitField, SOP2Op] = bits[29:23] + sdst:SGPRField = bits[22:16] + ssrc0:SSrc = bits[7:0] + ssrc1:SSrc = bits[15:8] + +class SOPC(Inst32): + encoding = bits[31:23] == 0b101111110 + op:Annotated[BitField, SOPCOp] = bits[22:16] + ssrc0:SSrc = bits[7:0] + ssrc1:SSrc = bits[15:8] + +class SOPK(Inst32): + encoding = bits[31:28] == 0b1011 + op:Annotated[BitField, SOPKOp] = bits[27:23] + sdst:SGPRField = bits[22:16] + simm16:SImm = bits[15:0] + +class SOPP(Inst32): + encoding = bits[31:23] == 0b101111111 + op:Annotated[BitField, SOPPOp] = bits[22:16] + simm16:SImm = bits[15:0] + +class VBUFFER(Inst64): + encoding = bits[31:26] == 0b110001 + soffset:SSrc = bits[6:0] + op:Annotated[BitField, VBUFFEROp] = bits[21:14] + tfe = bits[22] + vdata:VGPRField = bits[39:32] + rsrc = bits[49:41] + scope = bits[51:50] + th = bits[54:52] + format = bits[61:55] + offen = bits[62] + idxen = bits[63] + vaddr:VGPRField = bits[71:64] + ioffset = bits[95:72] + +class VDS(Inst64): + encoding = bits[31:26] == 0b110110 + offset0 = bits[7:0] + offset1 = bits[15:8] + op = bits[25:18] + addr:VGPRField = bits[39:32] + data0:VGPRField = bits[47:40] + data1:VGPRField = bits[55:48] + vdst:VGPRField = bits[63:56] + +class VDSDIR(Inst64): + encoding = bits[31:24] == 0b11001101 + vdst:VGPRField = bits[7:0] + waitexp = bits[10:8] + opsel = bits[14:11] + cm = bits[15] + op:Annotated[BitField, VDSDIROp] = bits[20:16] + src0:Src = bits[40:32] + src1:Src = bits[49:41] + src2:Src = bits[58:50] + neg = bits[63:61] + +class VEXPORT(Inst64): + encoding = bits[31:26] == 0b111110 + en = bits[3:0] + target = bits[9:4] + done = bits[11] + row = bits[13] + vsrc0 = bits[39:32] + vsrc1:VGPRField = bits[47:40] + vsrc2 = bits[55:48] + vsrc3 = bits[63:56] + +class VINTERP(Inst64): + encoding = bits[31:24] == 0b11001101 + op:Annotated[BitField, VINTERPOp] = bits[20:16] + vdst:VGPRField = bits[7:0] + src0:Src = bits[40:32] + src1:Src = bits[49:41] + src2:Src = bits[58:50] + waitexp = bits[10:8] + opsel = bits[14:11] + neg = bits[63:61] + cm = bits[15] + +class VOP1(Inst32): + encoding = bits[31:25] == 0b111111 + op:Annotated[BitField, VOP1Op] = bits[15:9] + vdst:VGPRField = bits[24:17] + src0:Src = bits[8:0] + +class VOP2(Inst32): + encoding = bits[31] == 0 + op:Annotated[BitField, VOP2Op] = bits[30:25] + vdst:VGPRField = bits[24:17] + src0:Src = bits[8:0] + vsrc1:VGPRField = bits[16:9] + +class VOP3(Inst64): + encoding = bits[31:26] == 0b110101 + op:Annotated[BitField, VOP3Op] = bits[25:16] + vdst:VGPRField = bits[7:0] + src0:Src = bits[40:32] + src1:Src = bits[49:41] + src2:Src = bits[58:50] + omod = bits[60:59] + neg = bits[63:61] + abs = bits[10:8] + opsel = bits[14:11] + cm = bits[15] + +class VOP3P(Inst64): + encoding = bits[31:24] == 0b11001100 + _defaults = {'opsel_hi': 3, 'opsel_hi2': 1} + op:Annotated[BitField, VOP3POp] = bits[22:16] + vdst:VGPRField = bits[7:0] + src0:Src = bits[40:32] + src1:Src = bits[49:41] + src2:Src = bits[58:50] + neg = bits[63:61] + neg_hi = bits[10:8] + opsel = bits[13:11] + opsel_hi = bits[60:59] + opsel_hi2 = bits[14] + cm = bits[15] + +class VOP3SD(Inst64): + encoding = bits[31:26] == 0b110101 + op:Annotated[BitField, VOP3SDOp] = bits[25:16] + vdst:VGPRField = bits[7:0] + sdst:SGPRField = bits[14:8] + src0:Src = bits[40:32] + src1:Src = bits[49:41] + src2:Src = bits[58:50] + cm = bits[15] + omod = bits[60:59] + neg = bits[63:61] + +class VOPC(Inst32): + encoding = bits[31:25] == 0b111110 + op:Annotated[BitField, VOPCOp] = bits[24:17] + src0:Src = bits[8:0] + vsrc1:VGPRField = bits[16:9] + +class VOPD(Inst64): + encoding = bits[31:26] == 0b110010 + opx:Annotated[BitField, VOPDOp] = bits[25:22] + opy:Annotated[BitField, VOPDOp] = bits[21:17] + vdstx:VGPRField = bits[63:56] + vdsty:VDSTYEnc = bits[55:49] + srcx0:Src = bits[8:0] + vsrcx1:VGPRField = bits[16:9] + srcy0:Src = bits[40:32] + vsrcy1:VGPRField = bits[48:41] + +# instruction helpers +s_load_b32 = functools.partial(SMEM, SMEMOp.S_LOAD_B32) +s_load_b64 = functools.partial(SMEM, SMEMOp.S_LOAD_B64) +s_load_b128 = functools.partial(SMEM, SMEMOp.S_LOAD_B128) +s_load_b256 = functools.partial(SMEM, SMEMOp.S_LOAD_B256) +s_load_b512 = functools.partial(SMEM, SMEMOp.S_LOAD_B512) +s_load_b96 = functools.partial(SMEM, SMEMOp.S_LOAD_B96) +s_load_i8 = functools.partial(SMEM, SMEMOp.S_LOAD_I8) +s_load_u8 = functools.partial(SMEM, SMEMOp.S_LOAD_U8) +s_load_i16 = functools.partial(SMEM, SMEMOp.S_LOAD_I16) +s_load_u16 = functools.partial(SMEM, SMEMOp.S_LOAD_U16) +s_buffer_load_b32 = functools.partial(SMEM, SMEMOp.S_BUFFER_LOAD_B32) +s_buffer_load_b64 = functools.partial(SMEM, SMEMOp.S_BUFFER_LOAD_B64) +s_buffer_load_b128 = functools.partial(SMEM, SMEMOp.S_BUFFER_LOAD_B128) +s_buffer_load_b256 = functools.partial(SMEM, SMEMOp.S_BUFFER_LOAD_B256) +s_buffer_load_b512 = functools.partial(SMEM, SMEMOp.S_BUFFER_LOAD_B512) +s_buffer_load_b96 = functools.partial(SMEM, SMEMOp.S_BUFFER_LOAD_B96) +s_buffer_load_i8 = functools.partial(SMEM, SMEMOp.S_BUFFER_LOAD_I8) +s_buffer_load_u8 = functools.partial(SMEM, SMEMOp.S_BUFFER_LOAD_U8) +s_buffer_load_i16 = functools.partial(SMEM, SMEMOp.S_BUFFER_LOAD_I16) +s_buffer_load_u16 = functools.partial(SMEM, SMEMOp.S_BUFFER_LOAD_U16) +s_dcache_inv = functools.partial(SMEM, SMEMOp.S_DCACHE_INV) +s_prefetch_inst = functools.partial(SMEM, SMEMOp.S_PREFETCH_INST) +s_prefetch_inst_pc_rel = functools.partial(SMEM, SMEMOp.S_PREFETCH_INST_PC_REL) +s_prefetch_data = functools.partial(SMEM, SMEMOp.S_PREFETCH_DATA) +s_buffer_prefetch_data = functools.partial(SMEM, SMEMOp.S_BUFFER_PREFETCH_DATA) +s_prefetch_data_pc_rel = functools.partial(SMEM, SMEMOp.S_PREFETCH_DATA_PC_REL) +s_mov_b32 = functools.partial(SOP1, SOP1Op.S_MOV_B32) +s_mov_b64 = functools.partial(SOP1, SOP1Op.S_MOV_B64) +s_cmov_b32 = functools.partial(SOP1, SOP1Op.S_CMOV_B32) +s_cmov_b64 = functools.partial(SOP1, SOP1Op.S_CMOV_B64) +s_brev_b32 = functools.partial(SOP1, SOP1Op.S_BREV_B32) +s_brev_b64 = functools.partial(SOP1, SOP1Op.S_BREV_B64) +s_ctz_i32_b32 = functools.partial(SOP1, SOP1Op.S_CTZ_I32_B32) +s_ctz_i32_b64 = functools.partial(SOP1, SOP1Op.S_CTZ_I32_B64) +s_clz_i32_u32 = functools.partial(SOP1, SOP1Op.S_CLZ_I32_U32) +s_clz_i32_u64 = functools.partial(SOP1, SOP1Op.S_CLZ_I32_U64) +s_cls_i32 = functools.partial(SOP1, SOP1Op.S_CLS_I32) +s_cls_i32_i64 = functools.partial(SOP1, SOP1Op.S_CLS_I32_I64) +s_sext_i32_i8 = functools.partial(SOP1, SOP1Op.S_SEXT_I32_I8) +s_sext_i32_i16 = functools.partial(SOP1, SOP1Op.S_SEXT_I32_I16) +s_bitset0_b32 = functools.partial(SOP1, SOP1Op.S_BITSET0_B32) +s_bitset0_b64 = functools.partial(SOP1, SOP1Op.S_BITSET0_B64) +s_bitset1_b32 = functools.partial(SOP1, SOP1Op.S_BITSET1_B32) +s_bitset1_b64 = functools.partial(SOP1, SOP1Op.S_BITSET1_B64) +s_bitreplicate_b64_b32 = functools.partial(SOP1, SOP1Op.S_BITREPLICATE_B64_B32) +s_abs_i32 = functools.partial(SOP1, SOP1Op.S_ABS_I32) +s_bcnt0_i32_b32 = functools.partial(SOP1, SOP1Op.S_BCNT0_I32_B32) +s_bcnt0_i32_b64 = functools.partial(SOP1, SOP1Op.S_BCNT0_I32_B64) +s_bcnt1_i32_b32 = functools.partial(SOP1, SOP1Op.S_BCNT1_I32_B32) +s_bcnt1_i32_b64 = functools.partial(SOP1, SOP1Op.S_BCNT1_I32_B64) +s_quadmask_b32 = functools.partial(SOP1, SOP1Op.S_QUADMASK_B32) +s_quadmask_b64 = functools.partial(SOP1, SOP1Op.S_QUADMASK_B64) +s_wqm_b32 = functools.partial(SOP1, SOP1Op.S_WQM_B32) +s_wqm_b64 = functools.partial(SOP1, SOP1Op.S_WQM_B64) +s_not_b32 = functools.partial(SOP1, SOP1Op.S_NOT_B32) +s_not_b64 = functools.partial(SOP1, SOP1Op.S_NOT_B64) +s_and_saveexec_b32 = functools.partial(SOP1, SOP1Op.S_AND_SAVEEXEC_B32) +s_and_saveexec_b64 = functools.partial(SOP1, SOP1Op.S_AND_SAVEEXEC_B64) +s_or_saveexec_b32 = functools.partial(SOP1, SOP1Op.S_OR_SAVEEXEC_B32) +s_or_saveexec_b64 = functools.partial(SOP1, SOP1Op.S_OR_SAVEEXEC_B64) +s_xor_saveexec_b32 = functools.partial(SOP1, SOP1Op.S_XOR_SAVEEXEC_B32) +s_xor_saveexec_b64 = functools.partial(SOP1, SOP1Op.S_XOR_SAVEEXEC_B64) +s_nand_saveexec_b32 = functools.partial(SOP1, SOP1Op.S_NAND_SAVEEXEC_B32) +s_nand_saveexec_b64 = functools.partial(SOP1, SOP1Op.S_NAND_SAVEEXEC_B64) +s_nor_saveexec_b32 = functools.partial(SOP1, SOP1Op.S_NOR_SAVEEXEC_B32) +s_nor_saveexec_b64 = functools.partial(SOP1, SOP1Op.S_NOR_SAVEEXEC_B64) +s_xnor_saveexec_b32 = functools.partial(SOP1, SOP1Op.S_XNOR_SAVEEXEC_B32) +s_xnor_saveexec_b64 = functools.partial(SOP1, SOP1Op.S_XNOR_SAVEEXEC_B64) +s_and_not0_saveexec_b32 = functools.partial(SOP1, SOP1Op.S_AND_NOT0_SAVEEXEC_B32) +s_and_not0_saveexec_b64 = functools.partial(SOP1, SOP1Op.S_AND_NOT0_SAVEEXEC_B64) +s_or_not0_saveexec_b32 = functools.partial(SOP1, SOP1Op.S_OR_NOT0_SAVEEXEC_B32) +s_or_not0_saveexec_b64 = functools.partial(SOP1, SOP1Op.S_OR_NOT0_SAVEEXEC_B64) +s_and_not1_saveexec_b32 = functools.partial(SOP1, SOP1Op.S_AND_NOT1_SAVEEXEC_B32) +s_and_not1_saveexec_b64 = functools.partial(SOP1, SOP1Op.S_AND_NOT1_SAVEEXEC_B64) +s_or_not1_saveexec_b32 = functools.partial(SOP1, SOP1Op.S_OR_NOT1_SAVEEXEC_B32) +s_or_not1_saveexec_b64 = functools.partial(SOP1, SOP1Op.S_OR_NOT1_SAVEEXEC_B64) +s_and_not0_wrexec_b32 = functools.partial(SOP1, SOP1Op.S_AND_NOT0_WREXEC_B32) +s_and_not0_wrexec_b64 = functools.partial(SOP1, SOP1Op.S_AND_NOT0_WREXEC_B64) +s_and_not1_wrexec_b32 = functools.partial(SOP1, SOP1Op.S_AND_NOT1_WREXEC_B32) +s_and_not1_wrexec_b64 = functools.partial(SOP1, SOP1Op.S_AND_NOT1_WREXEC_B64) +s_movrels_b32 = functools.partial(SOP1, SOP1Op.S_MOVRELS_B32) +s_movrels_b64 = functools.partial(SOP1, SOP1Op.S_MOVRELS_B64) +s_movreld_b32 = functools.partial(SOP1, SOP1Op.S_MOVRELD_B32) +s_movreld_b64 = functools.partial(SOP1, SOP1Op.S_MOVRELD_B64) +s_movrelsd_2_b32 = functools.partial(SOP1, SOP1Op.S_MOVRELSD_2_B32) +s_getpc_b64 = functools.partial(SOP1, SOP1Op.S_GETPC_B64) +s_setpc_b64 = functools.partial(SOP1, SOP1Op.S_SETPC_B64) +s_swappc_b64 = functools.partial(SOP1, SOP1Op.S_SWAPPC_B64) +s_rfe_b64 = functools.partial(SOP1, SOP1Op.S_RFE_B64) +s_sendmsg_rtn_b32 = functools.partial(SOP1, SOP1Op.S_SENDMSG_RTN_B32) +s_sendmsg_rtn_b64 = functools.partial(SOP1, SOP1Op.S_SENDMSG_RTN_B64) +s_barrier_signal = functools.partial(SOP1, SOP1Op.S_BARRIER_SIGNAL) +s_barrier_signal_isfirst = functools.partial(SOP1, SOP1Op.S_BARRIER_SIGNAL_ISFIRST) +s_get_barrier_state = functools.partial(SOP1, SOP1Op.S_GET_BARRIER_STATE) +s_alloc_vgpr = functools.partial(SOP1, SOP1Op.S_ALLOC_VGPR) +s_sleep_var = functools.partial(SOP1, SOP1Op.S_SLEEP_VAR) +s_ceil_f32 = functools.partial(SOP1, SOP1Op.S_CEIL_F32) +s_floor_f32 = functools.partial(SOP1, SOP1Op.S_FLOOR_F32) +s_trunc_f32 = functools.partial(SOP1, SOP1Op.S_TRUNC_F32) +s_rndne_f32 = functools.partial(SOP1, SOP1Op.S_RNDNE_F32) +s_cvt_f32_i32 = functools.partial(SOP1, SOP1Op.S_CVT_F32_I32) +s_cvt_f32_u32 = functools.partial(SOP1, SOP1Op.S_CVT_F32_U32) +s_cvt_i32_f32 = functools.partial(SOP1, SOP1Op.S_CVT_I32_F32) +s_cvt_u32_f32 = functools.partial(SOP1, SOP1Op.S_CVT_U32_F32) +s_cvt_f16_f32 = functools.partial(SOP1, SOP1Op.S_CVT_F16_F32) +s_cvt_f32_f16 = functools.partial(SOP1, SOP1Op.S_CVT_F32_F16) +s_cvt_hi_f32_f16 = functools.partial(SOP1, SOP1Op.S_CVT_HI_F32_F16) +s_ceil_f16 = functools.partial(SOP1, SOP1Op.S_CEIL_F16) +s_floor_f16 = functools.partial(SOP1, SOP1Op.S_FLOOR_F16) +s_trunc_f16 = functools.partial(SOP1, SOP1Op.S_TRUNC_F16) +s_rndne_f16 = functools.partial(SOP1, SOP1Op.S_RNDNE_F16) +s_add_co_u32 = functools.partial(SOP2, SOP2Op.S_ADD_CO_U32) +s_sub_co_u32 = functools.partial(SOP2, SOP2Op.S_SUB_CO_U32) +s_add_co_i32 = functools.partial(SOP2, SOP2Op.S_ADD_CO_I32) +s_sub_co_i32 = functools.partial(SOP2, SOP2Op.S_SUB_CO_I32) +s_add_co_ci_u32 = functools.partial(SOP2, SOP2Op.S_ADD_CO_CI_U32) +s_sub_co_ci_u32 = functools.partial(SOP2, SOP2Op.S_SUB_CO_CI_U32) +s_absdiff_i32 = functools.partial(SOP2, SOP2Op.S_ABSDIFF_I32) +s_lshl_b32 = functools.partial(SOP2, SOP2Op.S_LSHL_B32) +s_lshl_b64 = functools.partial(SOP2, SOP2Op.S_LSHL_B64) +s_lshr_b32 = functools.partial(SOP2, SOP2Op.S_LSHR_B32) +s_lshr_b64 = functools.partial(SOP2, SOP2Op.S_LSHR_B64) +s_ashr_i32 = functools.partial(SOP2, SOP2Op.S_ASHR_I32) +s_ashr_i64 = functools.partial(SOP2, SOP2Op.S_ASHR_I64) +s_lshl1_add_u32 = functools.partial(SOP2, SOP2Op.S_LSHL1_ADD_U32) +s_lshl2_add_u32 = functools.partial(SOP2, SOP2Op.S_LSHL2_ADD_U32) +s_lshl3_add_u32 = functools.partial(SOP2, SOP2Op.S_LSHL3_ADD_U32) +s_lshl4_add_u32 = functools.partial(SOP2, SOP2Op.S_LSHL4_ADD_U32) +s_min_i32 = functools.partial(SOP2, SOP2Op.S_MIN_I32) +s_min_u32 = functools.partial(SOP2, SOP2Op.S_MIN_U32) +s_max_i32 = functools.partial(SOP2, SOP2Op.S_MAX_I32) +s_max_u32 = functools.partial(SOP2, SOP2Op.S_MAX_U32) +s_and_b32 = functools.partial(SOP2, SOP2Op.S_AND_B32) +s_and_b64 = functools.partial(SOP2, SOP2Op.S_AND_B64) +s_or_b32 = functools.partial(SOP2, SOP2Op.S_OR_B32) +s_or_b64 = functools.partial(SOP2, SOP2Op.S_OR_B64) +s_xor_b32 = functools.partial(SOP2, SOP2Op.S_XOR_B32) +s_xor_b64 = functools.partial(SOP2, SOP2Op.S_XOR_B64) +s_nand_b32 = functools.partial(SOP2, SOP2Op.S_NAND_B32) +s_nand_b64 = functools.partial(SOP2, SOP2Op.S_NAND_B64) +s_nor_b32 = functools.partial(SOP2, SOP2Op.S_NOR_B32) +s_nor_b64 = functools.partial(SOP2, SOP2Op.S_NOR_B64) +s_xnor_b32 = functools.partial(SOP2, SOP2Op.S_XNOR_B32) +s_xnor_b64 = functools.partial(SOP2, SOP2Op.S_XNOR_B64) +s_and_not1_b32 = functools.partial(SOP2, SOP2Op.S_AND_NOT1_B32) +s_and_not1_b64 = functools.partial(SOP2, SOP2Op.S_AND_NOT1_B64) +s_or_not1_b32 = functools.partial(SOP2, SOP2Op.S_OR_NOT1_B32) +s_or_not1_b64 = functools.partial(SOP2, SOP2Op.S_OR_NOT1_B64) +s_bfe_u32 = functools.partial(SOP2, SOP2Op.S_BFE_U32) +s_bfe_i32 = functools.partial(SOP2, SOP2Op.S_BFE_I32) +s_bfe_u64 = functools.partial(SOP2, SOP2Op.S_BFE_U64) +s_bfe_i64 = functools.partial(SOP2, SOP2Op.S_BFE_I64) +s_bfm_b32 = functools.partial(SOP2, SOP2Op.S_BFM_B32) +s_bfm_b64 = functools.partial(SOP2, SOP2Op.S_BFM_B64) +s_mul_i32 = functools.partial(SOP2, SOP2Op.S_MUL_I32) +s_mul_hi_u32 = functools.partial(SOP2, SOP2Op.S_MUL_HI_U32) +s_mul_hi_i32 = functools.partial(SOP2, SOP2Op.S_MUL_HI_I32) +s_cselect_b32 = functools.partial(SOP2, SOP2Op.S_CSELECT_B32) +s_cselect_b64 = functools.partial(SOP2, SOP2Op.S_CSELECT_B64) +s_pack_ll_b32_b16 = functools.partial(SOP2, SOP2Op.S_PACK_LL_B32_B16) +s_pack_lh_b32_b16 = functools.partial(SOP2, SOP2Op.S_PACK_LH_B32_B16) +s_pack_hh_b32_b16 = functools.partial(SOP2, SOP2Op.S_PACK_HH_B32_B16) +s_pack_hl_b32_b16 = functools.partial(SOP2, SOP2Op.S_PACK_HL_B32_B16) +s_add_f32 = functools.partial(SOP2, SOP2Op.S_ADD_F32) +s_sub_f32 = functools.partial(SOP2, SOP2Op.S_SUB_F32) +s_min_num_f32 = functools.partial(SOP2, SOP2Op.S_MIN_NUM_F32) +s_max_num_f32 = functools.partial(SOP2, SOP2Op.S_MAX_NUM_F32) +s_mul_f32 = functools.partial(SOP2, SOP2Op.S_MUL_F32) +s_fmaak_f32 = functools.partial(SOP2, SOP2Op.S_FMAAK_F32) +s_fmamk_f32 = functools.partial(SOP2, SOP2Op.S_FMAMK_F32) +s_fmac_f32 = functools.partial(SOP2, SOP2Op.S_FMAC_F32) +s_cvt_pk_rtz_f16_f32 = functools.partial(SOP2, SOP2Op.S_CVT_PK_RTZ_F16_F32) +s_add_f16 = functools.partial(SOP2, SOP2Op.S_ADD_F16) +s_sub_f16 = functools.partial(SOP2, SOP2Op.S_SUB_F16) +s_min_num_f16 = functools.partial(SOP2, SOP2Op.S_MIN_NUM_F16) +s_max_num_f16 = functools.partial(SOP2, SOP2Op.S_MAX_NUM_F16) +s_mul_f16 = functools.partial(SOP2, SOP2Op.S_MUL_F16) +s_fmac_f16 = functools.partial(SOP2, SOP2Op.S_FMAC_F16) +s_minimum_f32 = functools.partial(SOP2, SOP2Op.S_MINIMUM_F32) +s_maximum_f32 = functools.partial(SOP2, SOP2Op.S_MAXIMUM_F32) +s_minimum_f16 = functools.partial(SOP2, SOP2Op.S_MINIMUM_F16) +s_maximum_f16 = functools.partial(SOP2, SOP2Op.S_MAXIMUM_F16) +s_add_nc_u64 = functools.partial(SOP2, SOP2Op.S_ADD_NC_U64) +s_sub_nc_u64 = functools.partial(SOP2, SOP2Op.S_SUB_NC_U64) +s_mul_u64 = functools.partial(SOP2, SOP2Op.S_MUL_U64) +s_cmp_eq_i32 = functools.partial(SOPC, SOPCOp.S_CMP_EQ_I32) +s_cmp_lg_i32 = functools.partial(SOPC, SOPCOp.S_CMP_LG_I32) +s_cmp_gt_i32 = functools.partial(SOPC, SOPCOp.S_CMP_GT_I32) +s_cmp_ge_i32 = functools.partial(SOPC, SOPCOp.S_CMP_GE_I32) +s_cmp_lt_i32 = functools.partial(SOPC, SOPCOp.S_CMP_LT_I32) +s_cmp_le_i32 = functools.partial(SOPC, SOPCOp.S_CMP_LE_I32) +s_cmp_eq_u32 = functools.partial(SOPC, SOPCOp.S_CMP_EQ_U32) +s_cmp_lg_u32 = functools.partial(SOPC, SOPCOp.S_CMP_LG_U32) +s_cmp_gt_u32 = functools.partial(SOPC, SOPCOp.S_CMP_GT_U32) +s_cmp_ge_u32 = functools.partial(SOPC, SOPCOp.S_CMP_GE_U32) +s_cmp_lt_u32 = functools.partial(SOPC, SOPCOp.S_CMP_LT_U32) +s_cmp_le_u32 = functools.partial(SOPC, SOPCOp.S_CMP_LE_U32) +s_bitcmp0_b32 = functools.partial(SOPC, SOPCOp.S_BITCMP0_B32) +s_bitcmp1_b32 = functools.partial(SOPC, SOPCOp.S_BITCMP1_B32) +s_bitcmp0_b64 = functools.partial(SOPC, SOPCOp.S_BITCMP0_B64) +s_bitcmp1_b64 = functools.partial(SOPC, SOPCOp.S_BITCMP1_B64) +s_cmp_eq_u64 = functools.partial(SOPC, SOPCOp.S_CMP_EQ_U64) +s_cmp_lg_u64 = functools.partial(SOPC, SOPCOp.S_CMP_LG_U64) +s_cmp_lt_f32 = functools.partial(SOPC, SOPCOp.S_CMP_LT_F32) +s_cmp_eq_f32 = functools.partial(SOPC, SOPCOp.S_CMP_EQ_F32) +s_cmp_le_f32 = functools.partial(SOPC, SOPCOp.S_CMP_LE_F32) +s_cmp_gt_f32 = functools.partial(SOPC, SOPCOp.S_CMP_GT_F32) +s_cmp_lg_f32 = functools.partial(SOPC, SOPCOp.S_CMP_LG_F32) +s_cmp_ge_f32 = functools.partial(SOPC, SOPCOp.S_CMP_GE_F32) +s_cmp_o_f32 = functools.partial(SOPC, SOPCOp.S_CMP_O_F32) +s_cmp_u_f32 = functools.partial(SOPC, SOPCOp.S_CMP_U_F32) +s_cmp_nge_f32 = functools.partial(SOPC, SOPCOp.S_CMP_NGE_F32) +s_cmp_nlg_f32 = functools.partial(SOPC, SOPCOp.S_CMP_NLG_F32) +s_cmp_ngt_f32 = functools.partial(SOPC, SOPCOp.S_CMP_NGT_F32) +s_cmp_nle_f32 = functools.partial(SOPC, SOPCOp.S_CMP_NLE_F32) +s_cmp_neq_f32 = functools.partial(SOPC, SOPCOp.S_CMP_NEQ_F32) +s_cmp_nlt_f32 = functools.partial(SOPC, SOPCOp.S_CMP_NLT_F32) +s_cmp_lt_f16 = functools.partial(SOPC, SOPCOp.S_CMP_LT_F16) +s_cmp_eq_f16 = functools.partial(SOPC, SOPCOp.S_CMP_EQ_F16) +s_cmp_le_f16 = functools.partial(SOPC, SOPCOp.S_CMP_LE_F16) +s_cmp_gt_f16 = functools.partial(SOPC, SOPCOp.S_CMP_GT_F16) +s_cmp_lg_f16 = functools.partial(SOPC, SOPCOp.S_CMP_LG_F16) +s_cmp_ge_f16 = functools.partial(SOPC, SOPCOp.S_CMP_GE_F16) +s_cmp_o_f16 = functools.partial(SOPC, SOPCOp.S_CMP_O_F16) +s_cmp_u_f16 = functools.partial(SOPC, SOPCOp.S_CMP_U_F16) +s_cmp_nge_f16 = functools.partial(SOPC, SOPCOp.S_CMP_NGE_F16) +s_cmp_nlg_f16 = functools.partial(SOPC, SOPCOp.S_CMP_NLG_F16) +s_cmp_ngt_f16 = functools.partial(SOPC, SOPCOp.S_CMP_NGT_F16) +s_cmp_nle_f16 = functools.partial(SOPC, SOPCOp.S_CMP_NLE_F16) +s_cmp_neq_f16 = functools.partial(SOPC, SOPCOp.S_CMP_NEQ_F16) +s_cmp_nlt_f16 = functools.partial(SOPC, SOPCOp.S_CMP_NLT_F16) +s_movk_i32 = functools.partial(SOPK, SOPKOp.S_MOVK_I32) +s_version = functools.partial(SOPK, SOPKOp.S_VERSION) +s_cmovk_i32 = functools.partial(SOPK, SOPKOp.S_CMOVK_I32) +s_addk_co_i32 = functools.partial(SOPK, SOPKOp.S_ADDK_CO_I32) +s_mulk_i32 = functools.partial(SOPK, SOPKOp.S_MULK_I32) +s_getreg_b32 = functools.partial(SOPK, SOPKOp.S_GETREG_B32) +s_setreg_b32 = functools.partial(SOPK, SOPKOp.S_SETREG_B32) +s_setreg_imm32_b32 = functools.partial(SOPK, SOPKOp.S_SETREG_IMM32_B32) +s_call_b64 = functools.partial(SOPK, SOPKOp.S_CALL_B64) +s_nop = functools.partial(SOPP, SOPPOp.S_NOP) +s_setkill = functools.partial(SOPP, SOPPOp.S_SETKILL) +s_sethalt = functools.partial(SOPP, SOPPOp.S_SETHALT) +s_sleep = functools.partial(SOPP, SOPPOp.S_SLEEP) +s_clause = functools.partial(SOPP, SOPPOp.S_CLAUSE) +s_delay_alu = functools.partial(SOPP, SOPPOp.S_DELAY_ALU) +s_wait_alu = functools.partial(SOPP, SOPPOp.S_WAIT_ALU) +s_waitcnt = functools.partial(SOPP, SOPPOp.S_WAITCNT) +s_wait_idle = functools.partial(SOPP, SOPPOp.S_WAIT_IDLE) +s_wait_event = functools.partial(SOPP, SOPPOp.S_WAIT_EVENT) +s_trap = functools.partial(SOPP, SOPPOp.S_TRAP) +s_round_mode = functools.partial(SOPP, SOPPOp.S_ROUND_MODE) +s_denorm_mode = functools.partial(SOPP, SOPPOp.S_DENORM_MODE) +s_barrier_wait = functools.partial(SOPP, SOPPOp.S_BARRIER_WAIT) +s_code_end = functools.partial(SOPP, SOPPOp.S_CODE_END) +s_branch = functools.partial(SOPP, SOPPOp.S_BRANCH) +s_cbranch_scc0 = functools.partial(SOPP, SOPPOp.S_CBRANCH_SCC0) +s_cbranch_scc1 = functools.partial(SOPP, SOPPOp.S_CBRANCH_SCC1) +s_cbranch_vccz = functools.partial(SOPP, SOPPOp.S_CBRANCH_VCCZ) +s_cbranch_vccnz = functools.partial(SOPP, SOPPOp.S_CBRANCH_VCCNZ) +s_cbranch_execz = functools.partial(SOPP, SOPPOp.S_CBRANCH_EXECZ) +s_cbranch_execnz = functools.partial(SOPP, SOPPOp.S_CBRANCH_EXECNZ) +s_endpgm = functools.partial(SOPP, SOPPOp.S_ENDPGM) +s_endpgm_saved = functools.partial(SOPP, SOPPOp.S_ENDPGM_SAVED) +s_wakeup = functools.partial(SOPP, SOPPOp.S_WAKEUP) +s_setprio = functools.partial(SOPP, SOPPOp.S_SETPRIO) +s_sendmsg = functools.partial(SOPP, SOPPOp.S_SENDMSG) +s_sendmsghalt = functools.partial(SOPP, SOPPOp.S_SENDMSGHALT) +s_incperflevel = functools.partial(SOPP, SOPPOp.S_INCPERFLEVEL) +s_decperflevel = functools.partial(SOPP, SOPPOp.S_DECPERFLEVEL) +s_icache_inv = functools.partial(SOPP, SOPPOp.S_ICACHE_INV) +s_wait_loadcnt = functools.partial(SOPP, SOPPOp.S_WAIT_LOADCNT) +s_wait_storecnt = functools.partial(SOPP, SOPPOp.S_WAIT_STORECNT) +s_wait_samplecnt = functools.partial(SOPP, SOPPOp.S_WAIT_SAMPLECNT) +s_wait_bvhcnt = functools.partial(SOPP, SOPPOp.S_WAIT_BVHCNT) +s_wait_expcnt = functools.partial(SOPP, SOPPOp.S_WAIT_EXPCNT) +s_wait_dscnt = functools.partial(SOPP, SOPPOp.S_WAIT_DSCNT) +s_wait_kmcnt = functools.partial(SOPP, SOPPOp.S_WAIT_KMCNT) +s_wait_loadcnt_dscnt = functools.partial(SOPP, SOPPOp.S_WAIT_LOADCNT_DSCNT) +s_wait_storecnt_dscnt = functools.partial(SOPP, SOPPOp.S_WAIT_STORECNT_DSCNT) +buffer_load_format_x = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_FORMAT_X) +buffer_load_format_xy = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_FORMAT_XY) +buffer_load_format_xyz = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_FORMAT_XYZ) +buffer_load_format_xyzw = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_FORMAT_XYZW) +buffer_store_format_x = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_FORMAT_X) +buffer_store_format_xy = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_FORMAT_XY) +buffer_store_format_xyz = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_FORMAT_XYZ) +buffer_store_format_xyzw = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_FORMAT_XYZW) +buffer_load_d16_format_x = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_D16_FORMAT_X) +buffer_load_d16_format_xy = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_D16_FORMAT_XY) +buffer_load_d16_format_xyz = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_D16_FORMAT_XYZ) +buffer_load_d16_format_xyzw = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_D16_FORMAT_XYZW) +buffer_store_d16_format_x = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_D16_FORMAT_X) +buffer_store_d16_format_xy = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_D16_FORMAT_XY) +buffer_store_d16_format_xyz = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_D16_FORMAT_XYZ) +buffer_store_d16_format_xyzw = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_D16_FORMAT_XYZW) +buffer_load_u8 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_U8) +buffer_load_i8 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_I8) +buffer_load_u16 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_U16) +buffer_load_i16 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_I16) +buffer_load_b32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_B32) +buffer_load_b64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_B64) +buffer_load_b96 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_B96) +buffer_load_b128 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_B128) +buffer_store_b8 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_B8) +buffer_store_b16 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_B16) +buffer_store_b32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_B32) +buffer_store_b64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_B64) +buffer_store_b96 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_B96) +buffer_store_b128 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_B128) +buffer_load_d16_u8 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_D16_U8) +buffer_load_d16_i8 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_D16_I8) +buffer_load_d16_b16 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_D16_B16) +buffer_load_d16_hi_u8 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_D16_HI_U8) +buffer_load_d16_hi_i8 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_D16_HI_I8) +buffer_load_d16_hi_b16 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_D16_HI_B16) +buffer_store_d16_hi_b8 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_D16_HI_B8) +buffer_store_d16_hi_b16 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_D16_HI_B16) +buffer_load_d16_hi_format_x = functools.partial(VBUFFER, VBUFFEROp.BUFFER_LOAD_D16_HI_FORMAT_X) +buffer_store_d16_hi_format_x = functools.partial(VBUFFER, VBUFFEROp.BUFFER_STORE_D16_HI_FORMAT_X) +buffer_atomic_swap_b32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_SWAP_B32) +buffer_atomic_cmpswap_b32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_CMPSWAP_B32) +buffer_atomic_add_u32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_ADD_U32) +buffer_atomic_sub_u32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_SUB_U32) +buffer_atomic_sub_clamp_u32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_SUB_CLAMP_U32) +buffer_atomic_min_i32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_MIN_I32) +buffer_atomic_min_u32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_MIN_U32) +buffer_atomic_max_i32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_MAX_I32) +buffer_atomic_max_u32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_MAX_U32) +buffer_atomic_and_b32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_AND_B32) +buffer_atomic_or_b32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_OR_B32) +buffer_atomic_xor_b32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_XOR_B32) +buffer_atomic_inc_u32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_INC_U32) +buffer_atomic_dec_u32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_DEC_U32) +buffer_atomic_swap_b64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_SWAP_B64) +buffer_atomic_cmpswap_b64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_CMPSWAP_B64) +buffer_atomic_add_u64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_ADD_U64) +buffer_atomic_sub_u64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_SUB_U64) +buffer_atomic_min_i64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_MIN_I64) +buffer_atomic_min_u64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_MIN_U64) +buffer_atomic_max_i64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_MAX_I64) +buffer_atomic_max_u64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_MAX_U64) +buffer_atomic_and_b64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_AND_B64) +buffer_atomic_or_b64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_OR_B64) +buffer_atomic_xor_b64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_XOR_B64) +buffer_atomic_inc_u64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_INC_U64) +buffer_atomic_dec_u64 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_DEC_U64) +buffer_atomic_cond_sub_u32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_COND_SUB_U32) +buffer_atomic_min_num_f32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_MIN_NUM_F32) +buffer_atomic_max_num_f32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_MAX_NUM_F32) +buffer_atomic_add_f32 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_ADD_F32) +buffer_atomic_pk_add_f16 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_PK_ADD_F16) +buffer_atomic_pk_add_bf16 = functools.partial(VBUFFER, VBUFFEROp.BUFFER_ATOMIC_PK_ADD_BF16) +tbuffer_load_format_x = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_LOAD_FORMAT_X) +tbuffer_load_format_xy = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_LOAD_FORMAT_XY) +tbuffer_load_format_xyz = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_LOAD_FORMAT_XYZ) +tbuffer_load_format_xyzw = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_LOAD_FORMAT_XYZW) +tbuffer_store_format_x = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_STORE_FORMAT_X) +tbuffer_store_format_xy = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_STORE_FORMAT_XY) +tbuffer_store_format_xyz = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_STORE_FORMAT_XYZ) +tbuffer_store_format_xyzw = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_STORE_FORMAT_XYZW) +tbuffer_load_d16_format_x = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_LOAD_D16_FORMAT_X) +tbuffer_load_d16_format_xy = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_LOAD_D16_FORMAT_XY) +tbuffer_load_d16_format_xyz = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_LOAD_D16_FORMAT_XYZ) +tbuffer_load_d16_format_xyzw = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_LOAD_D16_FORMAT_XYZW) +tbuffer_store_d16_format_x = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_STORE_D16_FORMAT_X) +tbuffer_store_d16_format_xy = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_STORE_D16_FORMAT_XY) +tbuffer_store_d16_format_xyz = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_STORE_D16_FORMAT_XYZ) +tbuffer_store_d16_format_xyzw = functools.partial(VBUFFER, VBUFFEROp.TBUFFER_STORE_D16_FORMAT_XYZW) +ds_param_load = functools.partial(VDSDIR, VDSDIROp.DS_PARAM_LOAD) +ds_direct_load = functools.partial(VDSDIR, VDSDIROp.DS_DIRECT_LOAD) +v_interp_p10_f32 = functools.partial(VINTERP, VINTERPOp.V_INTERP_P10_F32) +v_interp_p2_f32 = functools.partial(VINTERP, VINTERPOp.V_INTERP_P2_F32) +v_interp_p10_f16_f32 = functools.partial(VINTERP, VINTERPOp.V_INTERP_P10_F16_F32) +v_interp_p2_f16_f32 = functools.partial(VINTERP, VINTERPOp.V_INTERP_P2_F16_F32) +v_interp_p10_rtz_f16_f32 = functools.partial(VINTERP, VINTERPOp.V_INTERP_P10_RTZ_F16_F32) +v_interp_p2_rtz_f16_f32 = functools.partial(VINTERP, VINTERPOp.V_INTERP_P2_RTZ_F16_F32) +v_nop_e32 = functools.partial(VOP1, VOP1Op.V_NOP) +v_mov_b32_e32 = functools.partial(VOP1, VOP1Op.V_MOV_B32) +v_readfirstlane_b32_e32 = functools.partial(VOP1, VOP1Op.V_READFIRSTLANE_B32) +v_cvt_i32_f64_e32 = functools.partial(VOP1, VOP1Op.V_CVT_I32_F64) +v_cvt_f64_i32_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F64_I32) +v_cvt_f32_i32_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F32_I32) +v_cvt_f32_u32_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F32_U32) +v_cvt_u32_f32_e32 = functools.partial(VOP1, VOP1Op.V_CVT_U32_F32) +v_cvt_i32_f32_e32 = functools.partial(VOP1, VOP1Op.V_CVT_I32_F32) +v_cvt_f16_f32_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F16_F32) +v_cvt_f32_f16_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F32_F16) +v_cvt_nearest_i32_f32_e32 = functools.partial(VOP1, VOP1Op.V_CVT_NEAREST_I32_F32) +v_cvt_floor_i32_f32_e32 = functools.partial(VOP1, VOP1Op.V_CVT_FLOOR_I32_F32) +v_cvt_off_f32_i4_e32 = functools.partial(VOP1, VOP1Op.V_CVT_OFF_F32_I4) +v_cvt_f32_f64_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F32_F64) +v_cvt_f64_f32_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F64_F32) +v_cvt_f32_ubyte0_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F32_UBYTE0) +v_cvt_f32_ubyte1_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F32_UBYTE1) +v_cvt_f32_ubyte2_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F32_UBYTE2) +v_cvt_f32_ubyte3_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F32_UBYTE3) +v_cvt_u32_f64_e32 = functools.partial(VOP1, VOP1Op.V_CVT_U32_F64) +v_cvt_f64_u32_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F64_U32) +v_trunc_f64_e32 = functools.partial(VOP1, VOP1Op.V_TRUNC_F64) +v_ceil_f64_e32 = functools.partial(VOP1, VOP1Op.V_CEIL_F64) +v_rndne_f64_e32 = functools.partial(VOP1, VOP1Op.V_RNDNE_F64) +v_floor_f64_e32 = functools.partial(VOP1, VOP1Op.V_FLOOR_F64) +v_pipeflush_e32 = functools.partial(VOP1, VOP1Op.V_PIPEFLUSH) +v_mov_b16_e32 = functools.partial(VOP1, VOP1Op.V_MOV_B16) +v_fract_f32_e32 = functools.partial(VOP1, VOP1Op.V_FRACT_F32) +v_trunc_f32_e32 = functools.partial(VOP1, VOP1Op.V_TRUNC_F32) +v_ceil_f32_e32 = functools.partial(VOP1, VOP1Op.V_CEIL_F32) +v_rndne_f32_e32 = functools.partial(VOP1, VOP1Op.V_RNDNE_F32) +v_floor_f32_e32 = functools.partial(VOP1, VOP1Op.V_FLOOR_F32) +v_exp_f32_e32 = functools.partial(VOP1, VOP1Op.V_EXP_F32) +v_log_f32_e32 = functools.partial(VOP1, VOP1Op.V_LOG_F32) +v_rcp_f32_e32 = functools.partial(VOP1, VOP1Op.V_RCP_F32) +v_rcp_iflag_f32_e32 = functools.partial(VOP1, VOP1Op.V_RCP_IFLAG_F32) +v_rsq_f32_e32 = functools.partial(VOP1, VOP1Op.V_RSQ_F32) +v_rcp_f64_e32 = functools.partial(VOP1, VOP1Op.V_RCP_F64) +v_rsq_f64_e32 = functools.partial(VOP1, VOP1Op.V_RSQ_F64) +v_sqrt_f32_e32 = functools.partial(VOP1, VOP1Op.V_SQRT_F32) +v_sqrt_f64_e32 = functools.partial(VOP1, VOP1Op.V_SQRT_F64) +v_sin_f32_e32 = functools.partial(VOP1, VOP1Op.V_SIN_F32) +v_cos_f32_e32 = functools.partial(VOP1, VOP1Op.V_COS_F32) +v_not_b32_e32 = functools.partial(VOP1, VOP1Op.V_NOT_B32) +v_bfrev_b32_e32 = functools.partial(VOP1, VOP1Op.V_BFREV_B32) +v_clz_i32_u32_e32 = functools.partial(VOP1, VOP1Op.V_CLZ_I32_U32) +v_ctz_i32_b32_e32 = functools.partial(VOP1, VOP1Op.V_CTZ_I32_B32) +v_cls_i32_e32 = functools.partial(VOP1, VOP1Op.V_CLS_I32) +v_frexp_exp_i32_f64_e32 = functools.partial(VOP1, VOP1Op.V_FREXP_EXP_I32_F64) +v_frexp_mant_f64_e32 = functools.partial(VOP1, VOP1Op.V_FREXP_MANT_F64) +v_fract_f64_e32 = functools.partial(VOP1, VOP1Op.V_FRACT_F64) +v_frexp_exp_i32_f32_e32 = functools.partial(VOP1, VOP1Op.V_FREXP_EXP_I32_F32) +v_frexp_mant_f32_e32 = functools.partial(VOP1, VOP1Op.V_FREXP_MANT_F32) +v_movreld_b32_e32 = functools.partial(VOP1, VOP1Op.V_MOVRELD_B32) +v_movrels_b32_e32 = functools.partial(VOP1, VOP1Op.V_MOVRELS_B32) +v_movrelsd_b32_e32 = functools.partial(VOP1, VOP1Op.V_MOVRELSD_B32) +v_movrelsd_2_b32_e32 = functools.partial(VOP1, VOP1Op.V_MOVRELSD_2_B32) +v_cvt_f16_u16_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F16_U16) +v_cvt_f16_i16_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F16_I16) +v_cvt_u16_f16_e32 = functools.partial(VOP1, VOP1Op.V_CVT_U16_F16) +v_cvt_i16_f16_e32 = functools.partial(VOP1, VOP1Op.V_CVT_I16_F16) +v_rcp_f16_e32 = functools.partial(VOP1, VOP1Op.V_RCP_F16) +v_sqrt_f16_e32 = functools.partial(VOP1, VOP1Op.V_SQRT_F16) +v_rsq_f16_e32 = functools.partial(VOP1, VOP1Op.V_RSQ_F16) +v_log_f16_e32 = functools.partial(VOP1, VOP1Op.V_LOG_F16) +v_exp_f16_e32 = functools.partial(VOP1, VOP1Op.V_EXP_F16) +v_frexp_mant_f16_e32 = functools.partial(VOP1, VOP1Op.V_FREXP_MANT_F16) +v_frexp_exp_i16_f16_e32 = functools.partial(VOP1, VOP1Op.V_FREXP_EXP_I16_F16) +v_floor_f16_e32 = functools.partial(VOP1, VOP1Op.V_FLOOR_F16) +v_ceil_f16_e32 = functools.partial(VOP1, VOP1Op.V_CEIL_F16) +v_trunc_f16_e32 = functools.partial(VOP1, VOP1Op.V_TRUNC_F16) +v_rndne_f16_e32 = functools.partial(VOP1, VOP1Op.V_RNDNE_F16) +v_fract_f16_e32 = functools.partial(VOP1, VOP1Op.V_FRACT_F16) +v_sin_f16_e32 = functools.partial(VOP1, VOP1Op.V_SIN_F16) +v_cos_f16_e32 = functools.partial(VOP1, VOP1Op.V_COS_F16) +v_sat_pk_u8_i16_e32 = functools.partial(VOP1, VOP1Op.V_SAT_PK_U8_I16) +v_cvt_norm_i16_f16_e32 = functools.partial(VOP1, VOP1Op.V_CVT_NORM_I16_F16) +v_cvt_norm_u16_f16_e32 = functools.partial(VOP1, VOP1Op.V_CVT_NORM_U16_F16) +v_swap_b32_e32 = functools.partial(VOP1, VOP1Op.V_SWAP_B32) +v_swap_b16_e32 = functools.partial(VOP1, VOP1Op.V_SWAP_B16) +v_permlane64_b32_e32 = functools.partial(VOP1, VOP1Op.V_PERMLANE64_B32) +v_swaprel_b32_e32 = functools.partial(VOP1, VOP1Op.V_SWAPREL_B32) +v_not_b16_e32 = functools.partial(VOP1, VOP1Op.V_NOT_B16) +v_cvt_i32_i16_e32 = functools.partial(VOP1, VOP1Op.V_CVT_I32_I16) +v_cvt_u32_u16_e32 = functools.partial(VOP1, VOP1Op.V_CVT_U32_U16) +v_cvt_f32_fp8_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F32_FP8) +v_cvt_f32_bf8_e32 = functools.partial(VOP1, VOP1Op.V_CVT_F32_BF8) +v_cvt_pk_f32_fp8_e32 = functools.partial(VOP1, VOP1Op.V_CVT_PK_F32_FP8) +v_cvt_pk_f32_bf8_e32 = functools.partial(VOP1, VOP1Op.V_CVT_PK_F32_BF8) +v_cndmask_b32_e32 = functools.partial(VOP2, VOP2Op.V_CNDMASK_B32) +v_add_f64_e32 = functools.partial(VOP2, VOP2Op.V_ADD_F64) +v_add_f32_e32 = functools.partial(VOP2, VOP2Op.V_ADD_F32) +v_sub_f32_e32 = functools.partial(VOP2, VOP2Op.V_SUB_F32) +v_subrev_f32_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_F32) +v_mul_f64_e32 = functools.partial(VOP2, VOP2Op.V_MUL_F64) +v_mul_dx9_zero_f32_e32 = functools.partial(VOP2, VOP2Op.V_MUL_DX9_ZERO_F32) +v_mul_f32_e32 = functools.partial(VOP2, VOP2Op.V_MUL_F32) +v_mul_i32_i24_e32 = functools.partial(VOP2, VOP2Op.V_MUL_I32_I24) +v_mul_hi_i32_i24_e32 = functools.partial(VOP2, VOP2Op.V_MUL_HI_I32_I24) +v_mul_u32_u24_e32 = functools.partial(VOP2, VOP2Op.V_MUL_U32_U24) +v_mul_hi_u32_u24_e32 = functools.partial(VOP2, VOP2Op.V_MUL_HI_U32_U24) +v_min_num_f64_e32 = functools.partial(VOP2, VOP2Op.V_MIN_NUM_F64) +v_max_num_f64_e32 = functools.partial(VOP2, VOP2Op.V_MAX_NUM_F64) +v_min_i32_e32 = functools.partial(VOP2, VOP2Op.V_MIN_I32) +v_max_i32_e32 = functools.partial(VOP2, VOP2Op.V_MAX_I32) +v_min_u32_e32 = functools.partial(VOP2, VOP2Op.V_MIN_U32) +v_max_u32_e32 = functools.partial(VOP2, VOP2Op.V_MAX_U32) +v_min_num_f32_e32 = functools.partial(VOP2, VOP2Op.V_MIN_NUM_F32) +v_max_num_f32_e32 = functools.partial(VOP2, VOP2Op.V_MAX_NUM_F32) +v_lshlrev_b32_e32 = functools.partial(VOP2, VOP2Op.V_LSHLREV_B32) +v_lshrrev_b32_e32 = functools.partial(VOP2, VOP2Op.V_LSHRREV_B32) +v_ashrrev_i32_e32 = functools.partial(VOP2, VOP2Op.V_ASHRREV_I32) +v_and_b32_e32 = functools.partial(VOP2, VOP2Op.V_AND_B32) +v_or_b32_e32 = functools.partial(VOP2, VOP2Op.V_OR_B32) +v_xor_b32_e32 = functools.partial(VOP2, VOP2Op.V_XOR_B32) +v_xnor_b32_e32 = functools.partial(VOP2, VOP2Op.V_XNOR_B32) +v_lshlrev_b64_e32 = functools.partial(VOP2, VOP2Op.V_LSHLREV_B64) +v_add_co_ci_u32_e32 = functools.partial(VOP2, VOP2Op.V_ADD_CO_CI_U32) +v_sub_co_ci_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUB_CO_CI_U32) +v_subrev_co_ci_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_CO_CI_U32) +v_add_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_ADD_NC_U32) +v_sub_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUB_NC_U32) +v_subrev_nc_u32_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_NC_U32) +v_fmac_f32_e32 = functools.partial(VOP2, VOP2Op.V_FMAC_F32) +def v_fmamk_f32_e32(vdst, src0, K, vsrc1): return VOP2(VOP2Op.V_FMAMK_F32, vdst, src0, vsrc1, literal=K) +def v_fmaak_f32_e32(vdst, src0, vsrc1, K): return VOP2(VOP2Op.V_FMAAK_F32, vdst, src0, vsrc1, literal=K) +v_cvt_pk_rtz_f16_f32_e32 = functools.partial(VOP2, VOP2Op.V_CVT_PK_RTZ_F16_F32) +v_min_num_f16_e32 = functools.partial(VOP2, VOP2Op.V_MIN_NUM_F16) +v_max_num_f16_e32 = functools.partial(VOP2, VOP2Op.V_MAX_NUM_F16) +v_add_f16_e32 = functools.partial(VOP2, VOP2Op.V_ADD_F16) +v_sub_f16_e32 = functools.partial(VOP2, VOP2Op.V_SUB_F16) +v_subrev_f16_e32 = functools.partial(VOP2, VOP2Op.V_SUBREV_F16) +v_mul_f16_e32 = functools.partial(VOP2, VOP2Op.V_MUL_F16) +v_fmac_f16_e32 = functools.partial(VOP2, VOP2Op.V_FMAC_F16) +def v_fmamk_f16_e32(vdst, src0, K, vsrc1): return VOP2(VOP2Op.V_FMAMK_F16, vdst, src0, vsrc1, literal=K) +def v_fmaak_f16_e32(vdst, src0, vsrc1, K): return VOP2(VOP2Op.V_FMAAK_F16, vdst, src0, vsrc1, literal=K) +v_ldexp_f16_e32 = functools.partial(VOP2, VOP2Op.V_LDEXP_F16) +v_pk_fmac_f16_e32 = functools.partial(VOP2, VOP2Op.V_PK_FMAC_F16) +v_cmp_lt_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LT_F16) +v_cmp_eq_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_EQ_F16) +v_cmp_le_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LE_F16) +v_cmp_gt_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GT_F16) +v_cmp_lg_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LG_F16) +v_cmp_ge_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GE_F16) +v_cmp_o_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_O_F16) +v_cmp_u_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_U_F16) +v_cmp_nge_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NGE_F16) +v_cmp_nlg_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NLG_F16) +v_cmp_ngt_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NGT_F16) +v_cmp_nle_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NLE_F16) +v_cmp_neq_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NEQ_F16) +v_cmp_nlt_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NLT_F16) +v_cmp_lt_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LT_F32) +v_cmp_eq_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_EQ_F32) +v_cmp_le_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LE_F32) +v_cmp_gt_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GT_F32) +v_cmp_lg_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LG_F32) +v_cmp_ge_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GE_F32) +v_cmp_o_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_O_F32) +v_cmp_u_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_U_F32) +v_cmp_nge_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NGE_F32) +v_cmp_nlg_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NLG_F32) +v_cmp_ngt_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NGT_F32) +v_cmp_nle_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NLE_F32) +v_cmp_neq_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NEQ_F32) +v_cmp_nlt_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NLT_F32) +v_cmp_lt_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LT_F64) +v_cmp_eq_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_EQ_F64) +v_cmp_le_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LE_F64) +v_cmp_gt_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GT_F64) +v_cmp_lg_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LG_F64) +v_cmp_ge_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GE_F64) +v_cmp_o_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_O_F64) +v_cmp_u_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_U_F64) +v_cmp_nge_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NGE_F64) +v_cmp_nlg_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NLG_F64) +v_cmp_ngt_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NGT_F64) +v_cmp_nle_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NLE_F64) +v_cmp_neq_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NEQ_F64) +v_cmp_nlt_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NLT_F64) +v_cmp_lt_i16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LT_I16) +v_cmp_eq_i16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_EQ_I16) +v_cmp_le_i16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LE_I16) +v_cmp_gt_i16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GT_I16) +v_cmp_ne_i16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NE_I16) +v_cmp_ge_i16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GE_I16) +v_cmp_lt_u16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LT_U16) +v_cmp_eq_u16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_EQ_U16) +v_cmp_le_u16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LE_U16) +v_cmp_gt_u16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GT_U16) +v_cmp_ne_u16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NE_U16) +v_cmp_ge_u16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GE_U16) +v_cmp_lt_i32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LT_I32) +v_cmp_eq_i32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_EQ_I32) +v_cmp_le_i32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LE_I32) +v_cmp_gt_i32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GT_I32) +v_cmp_ne_i32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NE_I32) +v_cmp_ge_i32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GE_I32) +v_cmp_lt_u32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LT_U32) +v_cmp_eq_u32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_EQ_U32) +v_cmp_le_u32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LE_U32) +v_cmp_gt_u32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GT_U32) +v_cmp_ne_u32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NE_U32) +v_cmp_ge_u32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GE_U32) +v_cmp_lt_i64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LT_I64) +v_cmp_eq_i64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_EQ_I64) +v_cmp_le_i64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LE_I64) +v_cmp_gt_i64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GT_I64) +v_cmp_ne_i64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NE_I64) +v_cmp_ge_i64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GE_I64) +v_cmp_lt_u64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LT_U64) +v_cmp_eq_u64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_EQ_U64) +v_cmp_le_u64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_LE_U64) +v_cmp_gt_u64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GT_U64) +v_cmp_ne_u64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_NE_U64) +v_cmp_ge_u64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_GE_U64) +v_cmp_class_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMP_CLASS_F16) +v_cmp_class_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMP_CLASS_F32) +v_cmp_class_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMP_CLASS_F64) +v_cmpx_lt_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LT_F16) +v_cmpx_eq_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_EQ_F16) +v_cmpx_le_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LE_F16) +v_cmpx_gt_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GT_F16) +v_cmpx_lg_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LG_F16) +v_cmpx_ge_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GE_F16) +v_cmpx_o_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_O_F16) +v_cmpx_u_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_U_F16) +v_cmpx_nge_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NGE_F16) +v_cmpx_nlg_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NLG_F16) +v_cmpx_ngt_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NGT_F16) +v_cmpx_nle_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NLE_F16) +v_cmpx_neq_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NEQ_F16) +v_cmpx_nlt_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NLT_F16) +v_cmpx_lt_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LT_F32) +v_cmpx_eq_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_EQ_F32) +v_cmpx_le_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LE_F32) +v_cmpx_gt_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GT_F32) +v_cmpx_lg_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LG_F32) +v_cmpx_ge_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GE_F32) +v_cmpx_o_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_O_F32) +v_cmpx_u_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_U_F32) +v_cmpx_nge_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NGE_F32) +v_cmpx_nlg_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NLG_F32) +v_cmpx_ngt_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NGT_F32) +v_cmpx_nle_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NLE_F32) +v_cmpx_neq_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NEQ_F32) +v_cmpx_nlt_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NLT_F32) +v_cmpx_lt_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LT_F64) +v_cmpx_eq_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_EQ_F64) +v_cmpx_le_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LE_F64) +v_cmpx_gt_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GT_F64) +v_cmpx_lg_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LG_F64) +v_cmpx_ge_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GE_F64) +v_cmpx_o_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_O_F64) +v_cmpx_u_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_U_F64) +v_cmpx_nge_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NGE_F64) +v_cmpx_nlg_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NLG_F64) +v_cmpx_ngt_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NGT_F64) +v_cmpx_nle_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NLE_F64) +v_cmpx_neq_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NEQ_F64) +v_cmpx_nlt_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NLT_F64) +v_cmpx_lt_i16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LT_I16) +v_cmpx_eq_i16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_EQ_I16) +v_cmpx_le_i16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LE_I16) +v_cmpx_gt_i16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GT_I16) +v_cmpx_ne_i16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NE_I16) +v_cmpx_ge_i16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GE_I16) +v_cmpx_lt_u16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LT_U16) +v_cmpx_eq_u16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_EQ_U16) +v_cmpx_le_u16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LE_U16) +v_cmpx_gt_u16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GT_U16) +v_cmpx_ne_u16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NE_U16) +v_cmpx_ge_u16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GE_U16) +v_cmpx_lt_i32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LT_I32) +v_cmpx_eq_i32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_EQ_I32) +v_cmpx_le_i32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LE_I32) +v_cmpx_gt_i32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GT_I32) +v_cmpx_ne_i32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NE_I32) +v_cmpx_ge_i32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GE_I32) +v_cmpx_lt_u32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LT_U32) +v_cmpx_eq_u32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_EQ_U32) +v_cmpx_le_u32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LE_U32) +v_cmpx_gt_u32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GT_U32) +v_cmpx_ne_u32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NE_U32) +v_cmpx_ge_u32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GE_U32) +v_cmpx_lt_i64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LT_I64) +v_cmpx_eq_i64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_EQ_I64) +v_cmpx_le_i64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LE_I64) +v_cmpx_gt_i64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GT_I64) +v_cmpx_ne_i64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NE_I64) +v_cmpx_ge_i64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GE_I64) +v_cmpx_lt_u64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LT_U64) +v_cmpx_eq_u64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_EQ_U64) +v_cmpx_le_u64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_LE_U64) +v_cmpx_gt_u64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GT_U64) +v_cmpx_ne_u64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_NE_U64) +v_cmpx_ge_u64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_GE_U64) +v_cmpx_class_f16_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_CLASS_F16) +v_cmpx_class_f32_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_CLASS_F32) +v_cmpx_class_f64_e64 = functools.partial(VOP3, VOP3Op.V_CMPX_CLASS_F64) +v_cndmask_b32_e64 = functools.partial(VOP3, VOP3Op.V_CNDMASK_B32) +v_add_f64_e64 = functools.partial(VOP3, VOP3Op.V_ADD_F64) +v_add_f32_e64 = functools.partial(VOP3, VOP3Op.V_ADD_F32) +v_sub_f32_e64 = functools.partial(VOP3, VOP3Op.V_SUB_F32) +v_subrev_f32_e64 = functools.partial(VOP3, VOP3Op.V_SUBREV_F32) +v_mul_f64_e64 = functools.partial(VOP3, VOP3Op.V_MUL_F64) +v_mul_dx9_zero_f32_e64 = functools.partial(VOP3, VOP3Op.V_MUL_DX9_ZERO_F32) +v_mul_f32_e64 = functools.partial(VOP3, VOP3Op.V_MUL_F32) +v_mul_i32_i24_e64 = functools.partial(VOP3, VOP3Op.V_MUL_I32_I24) +v_mul_hi_i32_i24_e64 = functools.partial(VOP3, VOP3Op.V_MUL_HI_I32_I24) +v_mul_u32_u24_e64 = functools.partial(VOP3, VOP3Op.V_MUL_U32_U24) +v_mul_hi_u32_u24_e64 = functools.partial(VOP3, VOP3Op.V_MUL_HI_U32_U24) +v_min_num_f64_e64 = functools.partial(VOP3, VOP3Op.V_MIN_NUM_F64) +v_max_num_f64_e64 = functools.partial(VOP3, VOP3Op.V_MAX_NUM_F64) +v_min_i32_e64 = functools.partial(VOP3, VOP3Op.V_MIN_I32) +v_max_i32_e64 = functools.partial(VOP3, VOP3Op.V_MAX_I32) +v_min_u32_e64 = functools.partial(VOP3, VOP3Op.V_MIN_U32) +v_max_u32_e64 = functools.partial(VOP3, VOP3Op.V_MAX_U32) +v_min_num_f32_e64 = functools.partial(VOP3, VOP3Op.V_MIN_NUM_F32) +v_max_num_f32_e64 = functools.partial(VOP3, VOP3Op.V_MAX_NUM_F32) +v_lshlrev_b32_e64 = functools.partial(VOP3, VOP3Op.V_LSHLREV_B32) +v_lshrrev_b32_e64 = functools.partial(VOP3, VOP3Op.V_LSHRREV_B32) +v_ashrrev_i32_e64 = functools.partial(VOP3, VOP3Op.V_ASHRREV_I32) +v_and_b32_e64 = functools.partial(VOP3, VOP3Op.V_AND_B32) +v_or_b32_e64 = functools.partial(VOP3, VOP3Op.V_OR_B32) +v_xor_b32_e64 = functools.partial(VOP3, VOP3Op.V_XOR_B32) +v_xnor_b32_e64 = functools.partial(VOP3, VOP3Op.V_XNOR_B32) +v_lshlrev_b64_e64 = functools.partial(VOP3, VOP3Op.V_LSHLREV_B64) +v_add_nc_u32_e64 = functools.partial(VOP3, VOP3Op.V_ADD_NC_U32) +v_sub_nc_u32_e64 = functools.partial(VOP3, VOP3Op.V_SUB_NC_U32) +v_subrev_nc_u32_e64 = functools.partial(VOP3, VOP3Op.V_SUBREV_NC_U32) +v_fmac_f32_e64 = functools.partial(VOP3, VOP3Op.V_FMAC_F32) +v_cvt_pk_rtz_f16_f32_e64 = functools.partial(VOP3, VOP3Op.V_CVT_PK_RTZ_F16_F32) +v_min_num_f16_e64 = functools.partial(VOP3, VOP3Op.V_MIN_NUM_F16) +v_max_num_f16_e64 = functools.partial(VOP3, VOP3Op.V_MAX_NUM_F16) +v_add_f16_e64 = functools.partial(VOP3, VOP3Op.V_ADD_F16) +v_sub_f16_e64 = functools.partial(VOP3, VOP3Op.V_SUB_F16) +v_subrev_f16_e64 = functools.partial(VOP3, VOP3Op.V_SUBREV_F16) +v_mul_f16_e64 = functools.partial(VOP3, VOP3Op.V_MUL_F16) +v_fmac_f16_e64 = functools.partial(VOP3, VOP3Op.V_FMAC_F16) +v_ldexp_f16_e64 = functools.partial(VOP3, VOP3Op.V_LDEXP_F16) +v_nop_e64 = functools.partial(VOP3, VOP3Op.V_NOP) +v_mov_b32_e64 = functools.partial(VOP3, VOP3Op.V_MOV_B32) +v_readfirstlane_b32_e64 = functools.partial(VOP3, VOP3Op.V_READFIRSTLANE_B32) +v_cvt_i32_f64_e64 = functools.partial(VOP3, VOP3Op.V_CVT_I32_F64) +v_cvt_f64_i32_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F64_I32) +v_cvt_f32_i32_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F32_I32) +v_cvt_f32_u32_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F32_U32) +v_cvt_u32_f32_e64 = functools.partial(VOP3, VOP3Op.V_CVT_U32_F32) +v_cvt_i32_f32_e64 = functools.partial(VOP3, VOP3Op.V_CVT_I32_F32) +v_cvt_f16_f32_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F16_F32) +v_cvt_f32_f16_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F32_F16) +v_cvt_nearest_i32_f32_e64 = functools.partial(VOP3, VOP3Op.V_CVT_NEAREST_I32_F32) +v_cvt_floor_i32_f32_e64 = functools.partial(VOP3, VOP3Op.V_CVT_FLOOR_I32_F32) +v_cvt_off_f32_i4_e64 = functools.partial(VOP3, VOP3Op.V_CVT_OFF_F32_I4) +v_cvt_f32_f64_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F32_F64) +v_cvt_f64_f32_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F64_F32) +v_cvt_f32_ubyte0_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F32_UBYTE0) +v_cvt_f32_ubyte1_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F32_UBYTE1) +v_cvt_f32_ubyte2_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F32_UBYTE2) +v_cvt_f32_ubyte3_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F32_UBYTE3) +v_cvt_u32_f64_e64 = functools.partial(VOP3, VOP3Op.V_CVT_U32_F64) +v_cvt_f64_u32_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F64_U32) +v_trunc_f64_e64 = functools.partial(VOP3, VOP3Op.V_TRUNC_F64) +v_ceil_f64_e64 = functools.partial(VOP3, VOP3Op.V_CEIL_F64) +v_rndne_f64_e64 = functools.partial(VOP3, VOP3Op.V_RNDNE_F64) +v_floor_f64_e64 = functools.partial(VOP3, VOP3Op.V_FLOOR_F64) +v_pipeflush_e64 = functools.partial(VOP3, VOP3Op.V_PIPEFLUSH) +v_mov_b16_e64 = functools.partial(VOP3, VOP3Op.V_MOV_B16) +v_fract_f32_e64 = functools.partial(VOP3, VOP3Op.V_FRACT_F32) +v_trunc_f32_e64 = functools.partial(VOP3, VOP3Op.V_TRUNC_F32) +v_ceil_f32_e64 = functools.partial(VOP3, VOP3Op.V_CEIL_F32) +v_rndne_f32_e64 = functools.partial(VOP3, VOP3Op.V_RNDNE_F32) +v_floor_f32_e64 = functools.partial(VOP3, VOP3Op.V_FLOOR_F32) +v_exp_f32_e64 = functools.partial(VOP3, VOP3Op.V_EXP_F32) +v_log_f32_e64 = functools.partial(VOP3, VOP3Op.V_LOG_F32) +v_rcp_f32_e64 = functools.partial(VOP3, VOP3Op.V_RCP_F32) +v_rcp_iflag_f32_e64 = functools.partial(VOP3, VOP3Op.V_RCP_IFLAG_F32) +v_rsq_f32_e64 = functools.partial(VOP3, VOP3Op.V_RSQ_F32) +v_rcp_f64_e64 = functools.partial(VOP3, VOP3Op.V_RCP_F64) +v_rsq_f64_e64 = functools.partial(VOP3, VOP3Op.V_RSQ_F64) +v_sqrt_f32_e64 = functools.partial(VOP3, VOP3Op.V_SQRT_F32) +v_sqrt_f64_e64 = functools.partial(VOP3, VOP3Op.V_SQRT_F64) +v_sin_f32_e64 = functools.partial(VOP3, VOP3Op.V_SIN_F32) +v_cos_f32_e64 = functools.partial(VOP3, VOP3Op.V_COS_F32) +v_not_b32_e64 = functools.partial(VOP3, VOP3Op.V_NOT_B32) +v_bfrev_b32_e64 = functools.partial(VOP3, VOP3Op.V_BFREV_B32) +v_clz_i32_u32_e64 = functools.partial(VOP3, VOP3Op.V_CLZ_I32_U32) +v_ctz_i32_b32_e64 = functools.partial(VOP3, VOP3Op.V_CTZ_I32_B32) +v_cls_i32_e64 = functools.partial(VOP3, VOP3Op.V_CLS_I32) +v_frexp_exp_i32_f64_e64 = functools.partial(VOP3, VOP3Op.V_FREXP_EXP_I32_F64) +v_frexp_mant_f64_e64 = functools.partial(VOP3, VOP3Op.V_FREXP_MANT_F64) +v_fract_f64_e64 = functools.partial(VOP3, VOP3Op.V_FRACT_F64) +v_frexp_exp_i32_f32_e64 = functools.partial(VOP3, VOP3Op.V_FREXP_EXP_I32_F32) +v_frexp_mant_f32_e64 = functools.partial(VOP3, VOP3Op.V_FREXP_MANT_F32) +v_movreld_b32_e64 = functools.partial(VOP3, VOP3Op.V_MOVRELD_B32) +v_movrels_b32_e64 = functools.partial(VOP3, VOP3Op.V_MOVRELS_B32) +v_movrelsd_b32_e64 = functools.partial(VOP3, VOP3Op.V_MOVRELSD_B32) +v_movrelsd_2_b32_e64 = functools.partial(VOP3, VOP3Op.V_MOVRELSD_2_B32) +v_cvt_f16_u16_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F16_U16) +v_cvt_f16_i16_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F16_I16) +v_cvt_u16_f16_e64 = functools.partial(VOP3, VOP3Op.V_CVT_U16_F16) +v_cvt_i16_f16_e64 = functools.partial(VOP3, VOP3Op.V_CVT_I16_F16) +v_rcp_f16_e64 = functools.partial(VOP3, VOP3Op.V_RCP_F16) +v_sqrt_f16_e64 = functools.partial(VOP3, VOP3Op.V_SQRT_F16) +v_rsq_f16_e64 = functools.partial(VOP3, VOP3Op.V_RSQ_F16) +v_log_f16_e64 = functools.partial(VOP3, VOP3Op.V_LOG_F16) +v_exp_f16_e64 = functools.partial(VOP3, VOP3Op.V_EXP_F16) +v_frexp_mant_f16_e64 = functools.partial(VOP3, VOP3Op.V_FREXP_MANT_F16) +v_frexp_exp_i16_f16_e64 = functools.partial(VOP3, VOP3Op.V_FREXP_EXP_I16_F16) +v_floor_f16_e64 = functools.partial(VOP3, VOP3Op.V_FLOOR_F16) +v_ceil_f16_e64 = functools.partial(VOP3, VOP3Op.V_CEIL_F16) +v_trunc_f16_e64 = functools.partial(VOP3, VOP3Op.V_TRUNC_F16) +v_rndne_f16_e64 = functools.partial(VOP3, VOP3Op.V_RNDNE_F16) +v_fract_f16_e64 = functools.partial(VOP3, VOP3Op.V_FRACT_F16) +v_sin_f16_e64 = functools.partial(VOP3, VOP3Op.V_SIN_F16) +v_cos_f16_e64 = functools.partial(VOP3, VOP3Op.V_COS_F16) +v_sat_pk_u8_i16_e64 = functools.partial(VOP3, VOP3Op.V_SAT_PK_U8_I16) +v_cvt_norm_i16_f16_e64 = functools.partial(VOP3, VOP3Op.V_CVT_NORM_I16_F16) +v_cvt_norm_u16_f16_e64 = functools.partial(VOP3, VOP3Op.V_CVT_NORM_U16_F16) +v_not_b16_e64 = functools.partial(VOP3, VOP3Op.V_NOT_B16) +v_cvt_i32_i16_e64 = functools.partial(VOP3, VOP3Op.V_CVT_I32_I16) +v_cvt_u32_u16_e64 = functools.partial(VOP3, VOP3Op.V_CVT_U32_U16) +v_cvt_f32_fp8_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F32_FP8) +v_cvt_f32_bf8_e64 = functools.partial(VOP3, VOP3Op.V_CVT_F32_BF8) +v_cvt_pk_f32_fp8_e64 = functools.partial(VOP3, VOP3Op.V_CVT_PK_F32_FP8) +v_cvt_pk_f32_bf8_e64 = functools.partial(VOP3, VOP3Op.V_CVT_PK_F32_BF8) +v_fma_dx9_zero_f32 = functools.partial(VOP3, VOP3Op.V_FMA_DX9_ZERO_F32) +v_mad_i32_i24 = functools.partial(VOP3, VOP3Op.V_MAD_I32_I24) +v_mad_u32_u24 = functools.partial(VOP3, VOP3Op.V_MAD_U32_U24) +v_cubeid_f32 = functools.partial(VOP3, VOP3Op.V_CUBEID_F32) +v_cubesc_f32 = functools.partial(VOP3, VOP3Op.V_CUBESC_F32) +v_cubetc_f32 = functools.partial(VOP3, VOP3Op.V_CUBETC_F32) +v_cubema_f32 = functools.partial(VOP3, VOP3Op.V_CUBEMA_F32) +v_bfe_u32 = functools.partial(VOP3, VOP3Op.V_BFE_U32) +v_bfe_i32 = functools.partial(VOP3, VOP3Op.V_BFE_I32) +v_bfi_b32 = functools.partial(VOP3, VOP3Op.V_BFI_B32) +v_fma_f32 = functools.partial(VOP3, VOP3Op.V_FMA_F32) +v_fma_f64 = functools.partial(VOP3, VOP3Op.V_FMA_F64) +v_lerp_u8 = functools.partial(VOP3, VOP3Op.V_LERP_U8) +v_alignbit_b32 = functools.partial(VOP3, VOP3Op.V_ALIGNBIT_B32) +v_alignbyte_b32 = functools.partial(VOP3, VOP3Op.V_ALIGNBYTE_B32) +v_mullit_f32 = functools.partial(VOP3, VOP3Op.V_MULLIT_F32) +v_min3_i32 = functools.partial(VOP3, VOP3Op.V_MIN3_I32) +v_min3_u32 = functools.partial(VOP3, VOP3Op.V_MIN3_U32) +v_max3_i32 = functools.partial(VOP3, VOP3Op.V_MAX3_I32) +v_max3_u32 = functools.partial(VOP3, VOP3Op.V_MAX3_U32) +v_med3_i32 = functools.partial(VOP3, VOP3Op.V_MED3_I32) +v_med3_u32 = functools.partial(VOP3, VOP3Op.V_MED3_U32) +v_sad_u8 = functools.partial(VOP3, VOP3Op.V_SAD_U8) +v_sad_hi_u8 = functools.partial(VOP3, VOP3Op.V_SAD_HI_U8) +v_sad_u16 = functools.partial(VOP3, VOP3Op.V_SAD_U16) +v_sad_u32 = functools.partial(VOP3, VOP3Op.V_SAD_U32) +v_cvt_pk_u8_f32 = functools.partial(VOP3, VOP3Op.V_CVT_PK_U8_F32) +v_div_fixup_f32 = functools.partial(VOP3, VOP3Op.V_DIV_FIXUP_F32) +v_div_fixup_f64 = functools.partial(VOP3, VOP3Op.V_DIV_FIXUP_F64) +v_min3_num_f32 = functools.partial(VOP3, VOP3Op.V_MIN3_NUM_F32) +v_max3_num_f32 = functools.partial(VOP3, VOP3Op.V_MAX3_NUM_F32) +v_min3_num_f16 = functools.partial(VOP3, VOP3Op.V_MIN3_NUM_F16) +v_max3_num_f16 = functools.partial(VOP3, VOP3Op.V_MAX3_NUM_F16) +v_minimum3_f32 = functools.partial(VOP3, VOP3Op.V_MINIMUM3_F32) +v_maximum3_f32 = functools.partial(VOP3, VOP3Op.V_MAXIMUM3_F32) +v_minimum3_f16 = functools.partial(VOP3, VOP3Op.V_MINIMUM3_F16) +v_maximum3_f16 = functools.partial(VOP3, VOP3Op.V_MAXIMUM3_F16) +v_med3_num_f32 = functools.partial(VOP3, VOP3Op.V_MED3_NUM_F32) +v_med3_num_f16 = functools.partial(VOP3, VOP3Op.V_MED3_NUM_F16) +v_div_fmas_f32 = functools.partial(VOP3, VOP3Op.V_DIV_FMAS_F32) +v_div_fmas_f64 = functools.partial(VOP3, VOP3Op.V_DIV_FMAS_F64) +v_msad_u8 = functools.partial(VOP3, VOP3Op.V_MSAD_U8) +v_qsad_pk_u16_u8 = functools.partial(VOP3, VOP3Op.V_QSAD_PK_U16_U8) +v_mqsad_pk_u16_u8 = functools.partial(VOP3, VOP3Op.V_MQSAD_PK_U16_U8) +v_mqsad_u32_u8 = functools.partial(VOP3, VOP3Op.V_MQSAD_U32_U8) +v_xor3_b32 = functools.partial(VOP3, VOP3Op.V_XOR3_B32) +v_mad_u16 = functools.partial(VOP3, VOP3Op.V_MAD_U16) +v_perm_b32 = functools.partial(VOP3, VOP3Op.V_PERM_B32) +v_xad_u32 = functools.partial(VOP3, VOP3Op.V_XAD_U32) +v_lshl_add_u32 = functools.partial(VOP3, VOP3Op.V_LSHL_ADD_U32) +v_add_lshl_u32 = functools.partial(VOP3, VOP3Op.V_ADD_LSHL_U32) +v_fma_f16 = functools.partial(VOP3, VOP3Op.V_FMA_F16) +v_min3_i16 = functools.partial(VOP3, VOP3Op.V_MIN3_I16) +v_min3_u16 = functools.partial(VOP3, VOP3Op.V_MIN3_U16) +v_max3_i16 = functools.partial(VOP3, VOP3Op.V_MAX3_I16) +v_max3_u16 = functools.partial(VOP3, VOP3Op.V_MAX3_U16) +v_med3_i16 = functools.partial(VOP3, VOP3Op.V_MED3_I16) +v_med3_u16 = functools.partial(VOP3, VOP3Op.V_MED3_U16) +v_mad_i16 = functools.partial(VOP3, VOP3Op.V_MAD_I16) +v_div_fixup_f16 = functools.partial(VOP3, VOP3Op.V_DIV_FIXUP_F16) +v_add3_u32 = functools.partial(VOP3, VOP3Op.V_ADD3_U32) +v_lshl_or_b32 = functools.partial(VOP3, VOP3Op.V_LSHL_OR_B32) +v_and_or_b32 = functools.partial(VOP3, VOP3Op.V_AND_OR_B32) +v_or3_b32 = functools.partial(VOP3, VOP3Op.V_OR3_B32) +v_mad_u32_u16 = functools.partial(VOP3, VOP3Op.V_MAD_U32_U16) +v_mad_i32_i16 = functools.partial(VOP3, VOP3Op.V_MAD_I32_I16) +v_permlane16_b32 = functools.partial(VOP3, VOP3Op.V_PERMLANE16_B32) +v_permlanex16_b32 = functools.partial(VOP3, VOP3Op.V_PERMLANEX16_B32) +v_cndmask_b16 = functools.partial(VOP3, VOP3Op.V_CNDMASK_B16) +v_maxmin_u32 = functools.partial(VOP3, VOP3Op.V_MAXMIN_U32) +v_minmax_u32 = functools.partial(VOP3, VOP3Op.V_MINMAX_U32) +v_maxmin_i32 = functools.partial(VOP3, VOP3Op.V_MAXMIN_I32) +v_minmax_i32 = functools.partial(VOP3, VOP3Op.V_MINMAX_I32) +v_dot2_f16_f16 = functools.partial(VOP3, VOP3Op.V_DOT2_F16_F16) +v_dot2_bf16_bf16 = functools.partial(VOP3, VOP3Op.V_DOT2_BF16_BF16) +v_minmax_num_f32 = functools.partial(VOP3, VOP3Op.V_MINMAX_NUM_F32) +v_maxmin_num_f32 = functools.partial(VOP3, VOP3Op.V_MAXMIN_NUM_F32) +v_minmax_num_f16 = functools.partial(VOP3, VOP3Op.V_MINMAX_NUM_F16) +v_maxmin_num_f16 = functools.partial(VOP3, VOP3Op.V_MAXMIN_NUM_F16) +v_minimummaximum_f32 = functools.partial(VOP3, VOP3Op.V_MINIMUMMAXIMUM_F32) +v_maximumminimum_f32 = functools.partial(VOP3, VOP3Op.V_MAXIMUMMINIMUM_F32) +v_minimummaximum_f16 = functools.partial(VOP3, VOP3Op.V_MINIMUMMAXIMUM_F16) +v_maximumminimum_f16 = functools.partial(VOP3, VOP3Op.V_MAXIMUMMINIMUM_F16) +v_s_exp_f32 = functools.partial(VOP3, VOP3Op.V_S_EXP_F32) +v_s_exp_f16 = functools.partial(VOP3, VOP3Op.V_S_EXP_F16) +v_s_log_f32 = functools.partial(VOP3, VOP3Op.V_S_LOG_F32) +v_s_log_f16 = functools.partial(VOP3, VOP3Op.V_S_LOG_F16) +v_s_rcp_f32 = functools.partial(VOP3, VOP3Op.V_S_RCP_F32) +v_s_rcp_f16 = functools.partial(VOP3, VOP3Op.V_S_RCP_F16) +v_s_rsq_f32 = functools.partial(VOP3, VOP3Op.V_S_RSQ_F32) +v_s_rsq_f16 = functools.partial(VOP3, VOP3Op.V_S_RSQ_F16) +v_s_sqrt_f32 = functools.partial(VOP3, VOP3Op.V_S_SQRT_F32) +v_s_sqrt_f16 = functools.partial(VOP3, VOP3Op.V_S_SQRT_F16) +v_add_nc_u16 = functools.partial(VOP3, VOP3Op.V_ADD_NC_U16) +v_sub_nc_u16 = functools.partial(VOP3, VOP3Op.V_SUB_NC_U16) +v_mul_lo_u16 = functools.partial(VOP3, VOP3Op.V_MUL_LO_U16) +v_cvt_pk_i16_f32 = functools.partial(VOP3, VOP3Op.V_CVT_PK_I16_F32) +v_cvt_pk_u16_f32 = functools.partial(VOP3, VOP3Op.V_CVT_PK_U16_F32) +v_max_u16 = functools.partial(VOP3, VOP3Op.V_MAX_U16) +v_max_i16 = functools.partial(VOP3, VOP3Op.V_MAX_I16) +v_min_u16 = functools.partial(VOP3, VOP3Op.V_MIN_U16) +v_min_i16 = functools.partial(VOP3, VOP3Op.V_MIN_I16) +v_add_nc_i16 = functools.partial(VOP3, VOP3Op.V_ADD_NC_I16) +v_sub_nc_i16 = functools.partial(VOP3, VOP3Op.V_SUB_NC_I16) +v_permlane16_var_b32 = functools.partial(VOP3, VOP3Op.V_PERMLANE16_VAR_B32) +v_permlanex16_var_b32 = functools.partial(VOP3, VOP3Op.V_PERMLANEX16_VAR_B32) +v_pack_b32_f16 = functools.partial(VOP3, VOP3Op.V_PACK_B32_F16) +v_cvt_pk_norm_i16_f16 = functools.partial(VOP3, VOP3Op.V_CVT_PK_NORM_I16_F16) +v_cvt_pk_norm_u16_f16 = functools.partial(VOP3, VOP3Op.V_CVT_PK_NORM_U16_F16) +v_ldexp_f32 = functools.partial(VOP3, VOP3Op.V_LDEXP_F32) +v_bfm_b32 = functools.partial(VOP3, VOP3Op.V_BFM_B32) +v_bcnt_u32_b32 = functools.partial(VOP3, VOP3Op.V_BCNT_U32_B32) +v_mbcnt_lo_u32_b32 = functools.partial(VOP3, VOP3Op.V_MBCNT_LO_U32_B32) +v_mbcnt_hi_u32_b32 = functools.partial(VOP3, VOP3Op.V_MBCNT_HI_U32_B32) +v_cvt_pk_norm_i16_f32 = functools.partial(VOP3, VOP3Op.V_CVT_PK_NORM_I16_F32) +v_cvt_pk_norm_u16_f32 = functools.partial(VOP3, VOP3Op.V_CVT_PK_NORM_U16_F32) +v_cvt_pk_u16_u32 = functools.partial(VOP3, VOP3Op.V_CVT_PK_U16_U32) +v_cvt_pk_i16_i32 = functools.partial(VOP3, VOP3Op.V_CVT_PK_I16_I32) +v_sub_nc_i32 = functools.partial(VOP3, VOP3Op.V_SUB_NC_I32) +v_add_nc_i32 = functools.partial(VOP3, VOP3Op.V_ADD_NC_I32) +v_ldexp_f64 = functools.partial(VOP3, VOP3Op.V_LDEXP_F64) +v_mul_lo_u32 = functools.partial(VOP3, VOP3Op.V_MUL_LO_U32) +v_mul_hi_u32 = functools.partial(VOP3, VOP3Op.V_MUL_HI_U32) +v_mul_hi_i32 = functools.partial(VOP3, VOP3Op.V_MUL_HI_I32) +v_trig_preop_f64 = functools.partial(VOP3, VOP3Op.V_TRIG_PREOP_F64) +v_lshlrev_b16 = functools.partial(VOP3, VOP3Op.V_LSHLREV_B16) +v_lshrrev_b16 = functools.partial(VOP3, VOP3Op.V_LSHRREV_B16) +v_ashrrev_i16 = functools.partial(VOP3, VOP3Op.V_ASHRREV_I16) +v_lshrrev_b64 = functools.partial(VOP3, VOP3Op.V_LSHRREV_B64) +v_ashrrev_i64 = functools.partial(VOP3, VOP3Op.V_ASHRREV_I64) +v_minimum_f64 = functools.partial(VOP3, VOP3Op.V_MINIMUM_F64) +v_maximum_f64 = functools.partial(VOP3, VOP3Op.V_MAXIMUM_F64) +v_readlane_b32 = functools.partial(VOP3, VOP3Op.V_READLANE_B32) +v_writelane_b32 = functools.partial(VOP3, VOP3Op.V_WRITELANE_B32) +v_and_b16 = functools.partial(VOP3, VOP3Op.V_AND_B16) +v_or_b16 = functools.partial(VOP3, VOP3Op.V_OR_B16) +v_xor_b16 = functools.partial(VOP3, VOP3Op.V_XOR_B16) +v_minimum_f32 = functools.partial(VOP3, VOP3Op.V_MINIMUM_F32) +v_maximum_f32 = functools.partial(VOP3, VOP3Op.V_MAXIMUM_F32) +v_minimum_f16 = functools.partial(VOP3, VOP3Op.V_MINIMUM_F16) +v_maximum_f16 = functools.partial(VOP3, VOP3Op.V_MAXIMUM_F16) +v_cvt_pk_fp8_f32 = functools.partial(VOP3, VOP3Op.V_CVT_PK_FP8_F32) +v_cvt_pk_bf8_f32 = functools.partial(VOP3, VOP3Op.V_CVT_PK_BF8_F32) +v_cvt_sr_fp8_f32 = functools.partial(VOP3, VOP3Op.V_CVT_SR_FP8_F32) +v_cvt_sr_bf8_f32 = functools.partial(VOP3, VOP3Op.V_CVT_SR_BF8_F32) +v_pk_mad_i16 = functools.partial(VOP3P, VOP3POp.V_PK_MAD_I16) +v_pk_mul_lo_u16 = functools.partial(VOP3P, VOP3POp.V_PK_MUL_LO_U16) +v_pk_add_i16 = functools.partial(VOP3P, VOP3POp.V_PK_ADD_I16) +v_pk_sub_i16 = functools.partial(VOP3P, VOP3POp.V_PK_SUB_I16) +v_pk_lshlrev_b16 = functools.partial(VOP3P, VOP3POp.V_PK_LSHLREV_B16) +v_pk_lshrrev_b16 = functools.partial(VOP3P, VOP3POp.V_PK_LSHRREV_B16) +v_pk_ashrrev_i16 = functools.partial(VOP3P, VOP3POp.V_PK_ASHRREV_I16) +v_pk_max_i16 = functools.partial(VOP3P, VOP3POp.V_PK_MAX_I16) +v_pk_min_i16 = functools.partial(VOP3P, VOP3POp.V_PK_MIN_I16) +v_pk_mad_u16 = functools.partial(VOP3P, VOP3POp.V_PK_MAD_U16) +v_pk_add_u16 = functools.partial(VOP3P, VOP3POp.V_PK_ADD_U16) +v_pk_sub_u16 = functools.partial(VOP3P, VOP3POp.V_PK_SUB_U16) +v_pk_max_u16 = functools.partial(VOP3P, VOP3POp.V_PK_MAX_U16) +v_pk_min_u16 = functools.partial(VOP3P, VOP3POp.V_PK_MIN_U16) +v_pk_fma_f16 = functools.partial(VOP3P, VOP3POp.V_PK_FMA_F16) +v_pk_add_f16 = functools.partial(VOP3P, VOP3POp.V_PK_ADD_F16) +v_pk_mul_f16 = functools.partial(VOP3P, VOP3POp.V_PK_MUL_F16) +v_dot2_f32_f16 = functools.partial(VOP3P, VOP3POp.V_DOT2_F32_F16) +v_dot4_i32_iu8 = functools.partial(VOP3P, VOP3POp.V_DOT4_I32_IU8) +v_dot4_u32_u8 = functools.partial(VOP3P, VOP3POp.V_DOT4_U32_U8) +v_dot8_i32_iu4 = functools.partial(VOP3P, VOP3POp.V_DOT8_I32_IU4) +v_dot8_u32_u4 = functools.partial(VOP3P, VOP3POp.V_DOT8_U32_U4) +v_dot2_f32_bf16 = functools.partial(VOP3P, VOP3POp.V_DOT2_F32_BF16) +v_pk_min_num_f16 = functools.partial(VOP3P, VOP3POp.V_PK_MIN_NUM_F16) +v_pk_max_num_f16 = functools.partial(VOP3P, VOP3POp.V_PK_MAX_NUM_F16) +v_pk_minimum_f16 = functools.partial(VOP3P, VOP3POp.V_PK_MINIMUM_F16) +v_pk_maximum_f16 = functools.partial(VOP3P, VOP3POp.V_PK_MAXIMUM_F16) +v_fma_mix_f32 = functools.partial(VOP3P, VOP3POp.V_FMA_MIX_F32) +v_fma_mixlo_f16 = functools.partial(VOP3P, VOP3POp.V_FMA_MIXLO_F16) +v_fma_mixhi_f16 = functools.partial(VOP3P, VOP3POp.V_FMA_MIXHI_F16) +v_dot4_f32_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_DOT4_F32_FP8_BF8) +v_dot4_f32_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_DOT4_F32_BF8_FP8) +v_dot4_f32_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_DOT4_F32_FP8_FP8) +v_dot4_f32_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_DOT4_F32_BF8_BF8) +v_wmma_f32_16x16x16_f16 = functools.partial(VOP3P, VOP3POp.V_WMMA_F32_16X16X16_F16) +v_wmma_f32_16x16x16_bf16 = functools.partial(VOP3P, VOP3POp.V_WMMA_F32_16X16X16_BF16) +v_wmma_f16_16x16x16_f16 = functools.partial(VOP3P, VOP3POp.V_WMMA_F16_16X16X16_F16) +v_wmma_bf16_16x16x16_bf16 = functools.partial(VOP3P, VOP3POp.V_WMMA_BF16_16X16X16_BF16) +v_wmma_i32_16x16x16_iu8 = functools.partial(VOP3P, VOP3POp.V_WMMA_I32_16X16X16_IU8) +v_wmma_i32_16x16x16_iu4 = functools.partial(VOP3P, VOP3POp.V_WMMA_I32_16X16X16_IU4) +v_wmma_f32_16x16x16_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_WMMA_F32_16X16X16_FP8_FP8) +v_wmma_f32_16x16x16_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_WMMA_F32_16X16X16_FP8_BF8) +v_wmma_f32_16x16x16_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_WMMA_F32_16X16X16_BF8_FP8) +v_wmma_f32_16x16x16_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_WMMA_F32_16X16X16_BF8_BF8) +v_wmma_i32_16x16x32_iu4 = functools.partial(VOP3P, VOP3POp.V_WMMA_I32_16X16X32_IU4) +v_swmmac_f32_16x16x32_f16 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16X16X32_F16) +v_swmmac_f32_16x16x32_bf16 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16X16X32_BF16) +v_swmmac_f16_16x16x32_f16 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F16_16X16X32_F16) +v_swmmac_bf16_16x16x32_bf16 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_BF16_16X16X32_BF16) +v_swmmac_i32_16x16x32_iu8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_I32_16X16X32_IU8) +v_swmmac_i32_16x16x32_iu4 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_I32_16X16X32_IU4) +v_swmmac_i32_16x16x64_iu4 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_I32_16X16X64_IU4) +v_swmmac_f32_16x16x32_fp8_fp8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16X16X32_FP8_FP8) +v_swmmac_f32_16x16x32_fp8_bf8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16X16X32_FP8_BF8) +v_swmmac_f32_16x16x32_bf8_fp8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16X16X32_BF8_FP8) +v_swmmac_f32_16x16x32_bf8_bf8 = functools.partial(VOP3P, VOP3POp.V_SWMMAC_F32_16X16X32_BF8_BF8) +dword = functools.partial(VOP3SD, VOP3SDOp.DWORD) +v_add_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_ADD_CO_CI_U32) +v_sub_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_SUB_CO_CI_U32) +v_subrev_co_ci_u32 = functools.partial(VOP3SD, VOP3SDOp.V_SUBREV_CO_CI_U32) +v_div_scale_f32 = functools.partial(VOP3SD, VOP3SDOp.V_DIV_SCALE_F32) +v_div_scale_f64 = functools.partial(VOP3SD, VOP3SDOp.V_DIV_SCALE_F64) +v_mad_co_u64_u32 = functools.partial(VOP3SD, VOP3SDOp.V_MAD_CO_U64_U32) +v_mad_co_i64_i32 = functools.partial(VOP3SD, VOP3SDOp.V_MAD_CO_I64_I32) +v_add_co_u32 = functools.partial(VOP3SD, VOP3SDOp.V_ADD_CO_U32) +v_sub_co_u32 = functools.partial(VOP3SD, VOP3SDOp.V_SUB_CO_U32) +v_subrev_co_u32 = functools.partial(VOP3SD, VOP3SDOp.V_SUBREV_CO_U32) +v_cmp_lt_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LT_F16) +v_cmp_eq_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_EQ_F16) +v_cmp_le_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LE_F16) +v_cmp_gt_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GT_F16) +v_cmp_lg_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LG_F16) +v_cmp_ge_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GE_F16) +v_cmp_o_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_O_F16) +v_cmp_u_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_U_F16) +v_cmp_nge_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NGE_F16) +v_cmp_nlg_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NLG_F16) +v_cmp_ngt_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NGT_F16) +v_cmp_nle_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NLE_F16) +v_cmp_neq_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NEQ_F16) +v_cmp_nlt_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NLT_F16) +v_cmp_lt_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LT_F32) +v_cmp_eq_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_EQ_F32) +v_cmp_le_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LE_F32) +v_cmp_gt_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GT_F32) +v_cmp_lg_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LG_F32) +v_cmp_ge_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GE_F32) +v_cmp_o_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_O_F32) +v_cmp_u_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_U_F32) +v_cmp_nge_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NGE_F32) +v_cmp_nlg_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NLG_F32) +v_cmp_ngt_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NGT_F32) +v_cmp_nle_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NLE_F32) +v_cmp_neq_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NEQ_F32) +v_cmp_nlt_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NLT_F32) +v_cmp_lt_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LT_F64) +v_cmp_eq_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_EQ_F64) +v_cmp_le_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LE_F64) +v_cmp_gt_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GT_F64) +v_cmp_lg_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LG_F64) +v_cmp_ge_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GE_F64) +v_cmp_o_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_O_F64) +v_cmp_u_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_U_F64) +v_cmp_nge_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NGE_F64) +v_cmp_nlg_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NLG_F64) +v_cmp_ngt_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NGT_F64) +v_cmp_nle_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NLE_F64) +v_cmp_neq_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NEQ_F64) +v_cmp_nlt_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NLT_F64) +v_cmp_lt_i16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LT_I16) +v_cmp_eq_i16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_EQ_I16) +v_cmp_le_i16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LE_I16) +v_cmp_gt_i16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GT_I16) +v_cmp_ne_i16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NE_I16) +v_cmp_ge_i16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GE_I16) +v_cmp_lt_u16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LT_U16) +v_cmp_eq_u16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_EQ_U16) +v_cmp_le_u16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LE_U16) +v_cmp_gt_u16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GT_U16) +v_cmp_ne_u16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NE_U16) +v_cmp_ge_u16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GE_U16) +v_cmp_lt_i32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LT_I32) +v_cmp_eq_i32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_EQ_I32) +v_cmp_le_i32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LE_I32) +v_cmp_gt_i32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GT_I32) +v_cmp_ne_i32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NE_I32) +v_cmp_ge_i32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GE_I32) +v_cmp_lt_u32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LT_U32) +v_cmp_eq_u32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_EQ_U32) +v_cmp_le_u32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LE_U32) +v_cmp_gt_u32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GT_U32) +v_cmp_ne_u32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NE_U32) +v_cmp_ge_u32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GE_U32) +v_cmp_lt_i64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LT_I64) +v_cmp_eq_i64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_EQ_I64) +v_cmp_le_i64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LE_I64) +v_cmp_gt_i64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GT_I64) +v_cmp_ne_i64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NE_I64) +v_cmp_ge_i64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GE_I64) +v_cmp_lt_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LT_U64) +v_cmp_eq_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_EQ_U64) +v_cmp_le_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_LE_U64) +v_cmp_gt_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GT_U64) +v_cmp_ne_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_NE_U64) +v_cmp_ge_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_GE_U64) +v_cmp_class_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMP_CLASS_F16) +v_cmp_class_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMP_CLASS_F32) +v_cmp_class_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMP_CLASS_F64) +v_cmpx_lt_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LT_F16) +v_cmpx_eq_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_EQ_F16) +v_cmpx_le_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LE_F16) +v_cmpx_gt_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GT_F16) +v_cmpx_lg_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LG_F16) +v_cmpx_ge_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GE_F16) +v_cmpx_o_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_O_F16) +v_cmpx_u_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_U_F16) +v_cmpx_nge_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NGE_F16) +v_cmpx_nlg_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NLG_F16) +v_cmpx_ngt_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NGT_F16) +v_cmpx_nle_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NLE_F16) +v_cmpx_neq_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NEQ_F16) +v_cmpx_nlt_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NLT_F16) +v_cmpx_lt_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LT_F32) +v_cmpx_eq_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_EQ_F32) +v_cmpx_le_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LE_F32) +v_cmpx_gt_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GT_F32) +v_cmpx_lg_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LG_F32) +v_cmpx_ge_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GE_F32) +v_cmpx_o_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_O_F32) +v_cmpx_u_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_U_F32) +v_cmpx_nge_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NGE_F32) +v_cmpx_nlg_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NLG_F32) +v_cmpx_ngt_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NGT_F32) +v_cmpx_nle_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NLE_F32) +v_cmpx_neq_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NEQ_F32) +v_cmpx_nlt_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NLT_F32) +v_cmpx_lt_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LT_F64) +v_cmpx_eq_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_EQ_F64) +v_cmpx_le_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LE_F64) +v_cmpx_gt_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GT_F64) +v_cmpx_lg_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LG_F64) +v_cmpx_ge_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GE_F64) +v_cmpx_o_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_O_F64) +v_cmpx_u_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_U_F64) +v_cmpx_nge_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NGE_F64) +v_cmpx_nlg_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NLG_F64) +v_cmpx_ngt_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NGT_F64) +v_cmpx_nle_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NLE_F64) +v_cmpx_neq_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NEQ_F64) +v_cmpx_nlt_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NLT_F64) +v_cmpx_lt_i16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LT_I16) +v_cmpx_eq_i16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_EQ_I16) +v_cmpx_le_i16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LE_I16) +v_cmpx_gt_i16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GT_I16) +v_cmpx_ne_i16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NE_I16) +v_cmpx_ge_i16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GE_I16) +v_cmpx_lt_u16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LT_U16) +v_cmpx_eq_u16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_EQ_U16) +v_cmpx_le_u16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LE_U16) +v_cmpx_gt_u16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GT_U16) +v_cmpx_ne_u16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NE_U16) +v_cmpx_ge_u16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GE_U16) +v_cmpx_lt_i32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LT_I32) +v_cmpx_eq_i32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_EQ_I32) +v_cmpx_le_i32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LE_I32) +v_cmpx_gt_i32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GT_I32) +v_cmpx_ne_i32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NE_I32) +v_cmpx_ge_i32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GE_I32) +v_cmpx_lt_u32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LT_U32) +v_cmpx_eq_u32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_EQ_U32) +v_cmpx_le_u32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LE_U32) +v_cmpx_gt_u32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GT_U32) +v_cmpx_ne_u32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NE_U32) +v_cmpx_ge_u32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GE_U32) +v_cmpx_lt_i64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LT_I64) +v_cmpx_eq_i64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_EQ_I64) +v_cmpx_le_i64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LE_I64) +v_cmpx_gt_i64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GT_I64) +v_cmpx_ne_i64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NE_I64) +v_cmpx_ge_i64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GE_I64) +v_cmpx_lt_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LT_U64) +v_cmpx_eq_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_EQ_U64) +v_cmpx_le_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_LE_U64) +v_cmpx_gt_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GT_U64) +v_cmpx_ne_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_NE_U64) +v_cmpx_ge_u64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_GE_U64) +v_cmpx_class_f16_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_CLASS_F16) +v_cmpx_class_f32_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_CLASS_F32) +v_cmpx_class_f64_e32 = functools.partial(VOPC, VOPCOp.V_CMPX_CLASS_F64) +v_dual_fmac_f32 = functools.partial(VOPD, VOPDOp.V_DUAL_FMAC_F32) +v_dual_fmaak_f32 = functools.partial(VOPD, VOPDOp.V_DUAL_FMAAK_F32) +v_dual_fmamk_f32 = functools.partial(VOPD, VOPDOp.V_DUAL_FMAMK_F32) +v_dual_mul_f32 = functools.partial(VOPD, VOPDOp.V_DUAL_MUL_F32) +v_dual_add_f32 = functools.partial(VOPD, VOPDOp.V_DUAL_ADD_F32) +v_dual_sub_f32 = functools.partial(VOPD, VOPDOp.V_DUAL_SUB_F32) +v_dual_subrev_f32 = functools.partial(VOPD, VOPDOp.V_DUAL_SUBREV_F32) +v_dual_mul_dx9_zero_f32 = functools.partial(VOPD, VOPDOp.V_DUAL_MUL_DX9_ZERO_F32) +v_dual_mov_b32 = functools.partial(VOPD, VOPDOp.V_DUAL_MOV_B32) +v_dual_cndmask_b32 = functools.partial(VOPD, VOPDOp.V_DUAL_CNDMASK_B32) +v_dual_max_num_f32 = functools.partial(VOPD, VOPDOp.V_DUAL_MAX_NUM_F32) +v_dual_min_num_f32 = functools.partial(VOPD, VOPDOp.V_DUAL_MIN_NUM_F32) +v_dual_dot2acc_f32_f16 = functools.partial(VOPD, VOPDOp.V_DUAL_DOT2ACC_F32_F16) +v_dual_dot2acc_f32_bf16 = functools.partial(VOPD, VOPDOp.V_DUAL_DOT2ACC_F32_BF16) +v_dual_add_nc_u32 = functools.partial(VOPD, VOPDOp.V_DUAL_ADD_NC_U32) +v_dual_lshlrev_b32 = functools.partial(VOPD, VOPDOp.V_DUAL_LSHLREV_B32) +v_dual_and_b32 = functools.partial(VOPD, VOPDOp.V_DUAL_AND_B32) + +VCC_LO = SrcEnum.VCC_LO +VCC_HI = SrcEnum.VCC_HI +NULL = SrcEnum.NULL +M0 = SrcEnum.M0 +EXEC_LO = SrcEnum.EXEC_LO +EXEC_HI = SrcEnum.EXEC_HI +ZERO = SrcEnum.ZERO +DPP8FI = SrcEnum.DPP8FI +SHARED_BASE = SrcEnum.SHARED_BASE +SHARED_LIMIT = SrcEnum.SHARED_LIMIT +PRIVATE_BASE = SrcEnum.PRIVATE_BASE +PRIVATE_LIMIT = SrcEnum.PRIVATE_LIMIT +POS_HALF = SrcEnum.POS_HALF +NEG_HALF = SrcEnum.NEG_HALF +POS_ONE = SrcEnum.POS_ONE +NEG_ONE = SrcEnum.NEG_ONE +POS_TWO = SrcEnum.POS_TWO +NEG_TWO = SrcEnum.NEG_TWO +POS_FOUR = SrcEnum.POS_FOUR +NEG_FOUR = SrcEnum.NEG_FOUR +INV_2PI = SrcEnum.INV_2PI +VCCZ = SrcEnum.VCCZ +EXECZ = SrcEnum.EXECZ +SCC = SrcEnum.SCC +LDS_DIRECT = SrcEnum.LDS_DIRECT +OFF = NULL diff --git a/extra/assembly/amd/autogen/rdna4/gen_pcode.py b/extra/assembly/amd/autogen/rdna4/gen_pcode.py new file mode 100644 index 0000000000..a2ada65557 --- /dev/null +++ b/extra/assembly/amd/autogen/rdna4/gen_pcode.py @@ -0,0 +1,13053 @@ +# autogenerated by pcode.py - do not edit +# to regenerate: python -m extra.assembly.amd.pcode --arch rdna4 +# ruff: noqa: E501,F405,F403 +# mypy: ignore-errors +from extra.assembly.amd.autogen.rdna4 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp +from extra.assembly.amd.pcode import * + +def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.b32 = S0.b32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b32 = S0.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.b64 = S0.b64 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b64 = S0.b64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if SCC then + # D0.b32 = S0.b32 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + if SCC: + D0.b32 = S0.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if SCC then + # D0.b64 = S0.b64 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + if SCC: + D0.b64 = S0.b64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32[31 : 0] = S0.u32[0 : 31] + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32[31 : 0] = S0.u32[0 : 31] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[63 : 0] = S0.u64[0 : 63] + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64[63 : 0] = S0.u64[0 : 63] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if no ones are found + # for i in 0 : 31 do + # // Search from LSB + # if S0.u32[i] == 1'1U then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(0, int(31)+1): + if S0.u32[i] == 1: + tmp = Reg(i); break + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if no ones are found + # for i in 0 : 63 do + # // Search from LSB + # if S0.u64[i] == 1'1U then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(0, int(63)+1): + if S0.u64[i] == 1: + tmp = Reg(i); break + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if no ones are found + # for i in 0 : 31 do + # // Search from MSB + # if S0.u32[31 - i] == 1'1U then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(0, int(31)+1): + if S0.u32[31 - i] == 1: + tmp = Reg(i); break + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if no ones are found + # for i in 0 : 63 do + # // Search from MSB + # if S0.u64[63 - i] == 1'1U then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(0, int(63)+1): + if S0.u64[63 - i] == 1: + tmp = Reg(i); break + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if all bits are the same + # for i in 1 : 31 do + # // Search from MSB + # if S0.u32[31 - i] != S0.u32[31] then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(1, int(31)+1): + if S0.u32[31 - i] != S0.u32[31]: + tmp = Reg(i) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = -1; + # // Set if all bits are the same + # for i in 1 : 63 do + # // Search from MSB + # if S0.u64[63 - i] != S0.u64[63] then + # tmp = i; + # endif + # endfor; + # D0.i32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(-1) + for i in range(1, int(63)+1): + if S0.u64[63 - i] != S0.u64[63]: + tmp = Reg(i) + D0.i32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(signext(S0.i8)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (signext(S0.i8)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(signext(S0.i16)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (signext(S0.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32[S0.u32[4 : 0]] = 1'0U + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32[S0.u32[4 : 0]] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[S0.u32[5 : 0]] = 1'0U + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64[S0.u32[5 : 0]] = 0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32[S0.u32[4 : 0]] = 1'1U + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32[S0.u32[4 : 0]] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[S0.u32[5 : 0]] = 1'1U + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64[S0.u32[5 : 0]] = 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32; + # for i in 0 : 31 do + # D0.u64[i * 2] = tmp[i]; + # D0.u64[i * 2 + 1] = tmp[i] + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.u32) + for i in range(0, int(31)+1): + D0.u64[i * 2] = tmp[i] + D0.u64[i * 2 + 1] = tmp[i] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; + # SCC = D0.i32 != 0 + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.i32 = ((-S0.i32) if (S0.i32 < 0) else (S0.i32)) + SCC = Reg(D0.i32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 0; + # for i in 0 : 31 do + # tmp += S0.u32[i] == 1'0U ? 1 : 0 + # endfor; + # D0.i32 = tmp; + # SCC = D0.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(0) + for i in range(0, int(31)+1): + tmp += ((1) if (S0.u32[i] == 0) else (0)) + D0.i32 = tmp + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 0; + # for i in 0 : 63 do + # tmp += S0.u64[i] == 1'0U ? 1 : 0 + # endfor; + # D0.i32 = tmp; + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(0) + for i in range(0, int(63)+1): + tmp += ((1) if (S0.u64[i] == 0) else (0)) + D0.i32 = tmp + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 0; + # for i in 0 : 31 do + # tmp += S0.u32[i] == 1'1U ? 1 : 0 + # endfor; + # D0.i32 = tmp; + # SCC = D0.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(0) + for i in range(0, int(31)+1): + tmp += ((1) if (S0.u32[i] == 1) else (0)) + D0.i32 = tmp + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 0; + # for i in 0 : 63 do + # tmp += S0.u64[i] == 1'1U ? 1 : 0 + # endfor; + # D0.i32 = tmp; + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(0) + for i in range(0, int(63)+1): + tmp += ((1) if (S0.u64[i] == 1) else (0)) + D0.i32 = tmp + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~S0.u32; + # SCC = D0.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = ~S0.u32 + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = ~S0.u64; + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = ~S0.u64 + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u32; + # EXEC.u32 = (S0.u32 & EXEC.u32); + # D0.u32 = saveexec.u32; + # SCC = EXEC.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u32) + EXEC.u32 = (S0.u32 & EXEC.u32) + D0.u32 = saveexec.u32 + SCC = Reg(EXEC.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u64; + # EXEC.u64 = (S0.u64 & EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (S0.u64 & EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set + # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination + # saveexec = EXEC.u32; + # EXEC.u32 = (S0.u32 | EXEC.u32); + # D0.u32 = saveexec.u32; + # SCC = EXEC.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u32) + EXEC.u32 = (S0.u32 | EXEC.u32) + D0.u32 = saveexec.u32 + SCC = Reg(EXEC.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set + # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination + # saveexec = EXEC.u64; + # EXEC.u64 = (S0.u64 | EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (S0.u64 | EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u32; + # EXEC.u32 = (S0.u32 ^ EXEC.u32); + # D0.u32 = saveexec.u32; + # SCC = EXEC.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u32) + EXEC.u32 = (S0.u32 ^ EXEC.u32) + D0.u32 = saveexec.u32 + SCC = Reg(EXEC.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u64; + # EXEC.u64 = (S0.u64 ^ EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (S0.u64 ^ EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u32; + # EXEC.u32 = ~(S0.u32 & EXEC.u32); + # D0.u32 = saveexec.u32; + # SCC = EXEC.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u32) + EXEC.u32 = ~(S0.u32 & EXEC.u32) + D0.u32 = saveexec.u32 + SCC = Reg(EXEC.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u64; + # EXEC.u64 = ~(S0.u64 & EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = ~(S0.u64 & EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u32; + # EXEC.u32 = ~(S0.u32 | EXEC.u32); + # D0.u32 = saveexec.u32; + # SCC = EXEC.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u32) + EXEC.u32 = ~(S0.u32 | EXEC.u32) + D0.u32 = saveexec.u32 + SCC = Reg(EXEC.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u64; + # EXEC.u64 = ~(S0.u64 | EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = ~(S0.u64 | EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u32; + # EXEC.u32 = ~(S0.u32 ^ EXEC.u32); + # D0.u32 = saveexec.u32; + # SCC = EXEC.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u32) + EXEC.u32 = ~(S0.u32 ^ EXEC.u32) + D0.u32 = saveexec.u32 + SCC = Reg(EXEC.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, + # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar + # saveexec = EXEC.u64; + # EXEC.u64 = ~(S0.u64 ^ EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = ~(S0.u64 ^ EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into + # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into + # saveexec = EXEC.u32; + # EXEC.u32 = (~S0.u32 & EXEC.u32); + # D0.u32 = saveexec.u32; + # SCC = EXEC.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u32) + EXEC.u32 = (~S0.u32 & EXEC.u32) + D0.u32 = saveexec.u32 + SCC = Reg(EXEC.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into + # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into + # saveexec = EXEC.u64; + # EXEC.u64 = (~S0.u64 & EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (~S0.u64 & EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the + # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the + # saveexec = EXEC.u32; + # EXEC.u32 = (~S0.u32 | EXEC.u32); + # D0.u32 = saveexec.u32; + # SCC = EXEC.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u32) + EXEC.u32 = (~S0.u32 | EXEC.u32) + D0.u32 = saveexec.u32 + SCC = Reg(EXEC.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the + # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the + # saveexec = EXEC.u64; + # EXEC.u64 = (~S0.u64 | EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (~S0.u64 | EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into + # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into + # saveexec = EXEC.u32; + # EXEC.u32 = (S0.u32 & ~EXEC.u32); + # D0.u32 = saveexec.u32; + # SCC = EXEC.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u32) + EXEC.u32 = (S0.u32 & ~EXEC.u32) + D0.u32 = saveexec.u32 + SCC = Reg(EXEC.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into + # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into + # saveexec = EXEC.u64; + # EXEC.u64 = (S0.u64 & ~EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (S0.u64 & ~EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the + # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the + # saveexec = EXEC.u32; + # EXEC.u32 = (S0.u32 | ~EXEC.u32); + # D0.u32 = saveexec.u32; + # SCC = EXEC.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u32) + EXEC.u32 = (S0.u32 | ~EXEC.u32) + D0.u32 = saveexec.u32 + SCC = Reg(EXEC.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the + # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the + # saveexec = EXEC.u64; + # EXEC.u64 = (S0.u64 | ~EXEC.u64); + # D0.u64 = saveexec.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + saveexec = Reg(exec_mask) + # --- compiled pseudocode --- + saveexec = Reg(EXEC.u64) + EXEC.u64 = (S0.u64 | ~EXEC.u64) + D0.u64 = saveexec.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into + # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op + # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is + # EXEC.u32 = (~S0.u32 & EXEC.u32); + # D0.u32 = EXEC.u32; + # SCC = EXEC.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + # --- compiled pseudocode --- + EXEC.u32 = (~S0.u32 & EXEC.u32) + D0.u32 = EXEC.u32 + SCC = Reg(EXEC.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into + # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op + # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is + # EXEC.u64 = (~S0.u64 & EXEC.u64); + # D0.u64 = EXEC.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + # --- compiled pseudocode --- + EXEC.u64 = (~S0.u64 & EXEC.u64) + D0.u64 = EXEC.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into + # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op + # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is + # EXEC.u32 = (S0.u32 & ~EXEC.u32); + # D0.u32 = EXEC.u32; + # SCC = EXEC.u32 != 0U + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + # --- compiled pseudocode --- + EXEC.u32 = (S0.u32 & ~EXEC.u32) + D0.u32 = EXEC.u32 + SCC = Reg(EXEC.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into + # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op + # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is + # EXEC.u64 = (S0.u64 & ~EXEC.u64); + # D0.u64 = EXEC.u64; + # SCC = EXEC.u64 != 0ULL + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + EXEC = Reg(exec_mask) + # --- compiled pseudocode --- + EXEC.u64 = (S0.u64 & ~EXEC.u64) + D0.u64 = EXEC.u64 + SCC = Reg(EXEC.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['d0_64'] = True + return result + +def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # If SDST is VCC then VCCZ is undefined. + VCC = Reg(vcc) + # --- compiled pseudocode --- + + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # If SDST is VCC then VCCZ is undefined. + VCC = Reg(vcc) + # --- compiled pseudocode --- + + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _SOP1Op_S_BARRIER_SIGNAL(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if !InWorkgroup() then + # elsif ((barrierNumber == -2) && !WAVE_STATUS.PRIV) then + # elsif barrierNumber == 0 then + # else + # BARRIER_STATE[barrierNumber & 63].signalCnt += 7'1U + # endif; + # --- compiled pseudocode --- + if not InWorkgroup(): + pass + elif ((barrierNumber == -2) and not WAVE_STATUS.PRIV): + pass + elif barrierNumber == 0: + pass + else: + BARRIER_STATE[barrierNumber & 63].signalCnt += 1 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _SOP1Op_S_BARRIER_SIGNAL_ISFIRST(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if !InWorkgroup() then + # SCC = 1'0U + # elsif ((barrierNumber == -2) && !WAVE_STATUS.PRIV) then + # SCC = 1'0U + # elsif barrierNumber == 0 then + # SCC = 1'0U + # else + # // Set SCC if this is the first signaling event for this barrier. + # SCC = BARRIER_STATE[barrierNumber & 63].signalCnt.u32 == 0U; + # BARRIER_STATE[barrierNumber & 63].signalCnt += 7'1U + # endif; + SCC = Reg(scc) + # --- compiled pseudocode --- + if not InWorkgroup(): + SCC = Reg(0) + elif ((barrierNumber == -2) and not WAVE_STATUS.PRIV): + SCC = Reg(0) + elif barrierNumber == 0: + SCC = Reg(0) + else: + SCC = Reg(BARRIER_STATE[barrierNumber & 63].signalCnt.u32 == 0) + BARRIER_STATE[barrierNumber & 63].signalCnt += 1 + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOP1Op_S_GET_BARRIER_STATE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U({ 9'0, BARRIER_STATE[barrierNumber & 63].signalCnt.u7, 5'0, BARRIER_STATE[barrierNumber & + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ({ 0, BARRIER_STATE[barrierNumber & 63].signalCnt.u7, 0, BARRIER_STATE[barrierNumber] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_ALLOC_VGPR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # n = ReallocVgprs(32'I(S0[8 : 0].u32)); + # if n < 0 then + # SCC = 1'0U + # else + # NUM_VGPRS = n; + # SCC = 1'1U + # endif + S0 = Reg(s0) + SCC = Reg(scc) + # --- compiled pseudocode --- + n = ReallocVgprs((S0[8 : 0].u32)) + if n < 0: + SCC = Reg(0) + else: + NUM_VGPRS = n + SCC = Reg(1) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOP1Op_S_SLEEP_VAR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # S0[6:0] determines the sleep duration. The wave sleeps for (64*(S0[6:0]-1) … 64*S0[6:0]) clocks. The exact + S0 = Reg(s0) + # --- compiled pseudocode --- + + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32); + # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then + # D0.f32 += 1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): + D0.f32 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32); + # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then + # D0.f32 += -1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): + D0.f32 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = floor(S0.f32 + 0.5F); + # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then + # D0.f32 -= 1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = floor(S0.f32 + 0.5) + if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): + D0.f32 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = i32_to_f32(S0.i32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = i32_to_f32(S0.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0.u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = f32_to_u32(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = f32_to_u32(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = f32_to_f16(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = f32_to_f16(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = f16_to_f32(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = f16_to_f32(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = f16_to_f32(S0[31 : 16].f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = f16_to_f32(S0[31 : 16].f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = trunc(S0.f16); + # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then + # D0.f16 += 16'1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = trunc(S0.f16) + if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): + D0.f16 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = trunc(S0.f16); + # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then + # D0.f16 += -16'1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = trunc(S0.f16) + if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): + D0.f16 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = trunc(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = trunc(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = floor(S0.f16 + 16'0.5); + # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then + # D0.f16 -= 16'1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = floor(S0.f16 + 0.5) + if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): + D0.f16 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +SOP1Op_FUNCTIONS = { + SOP1Op.S_MOV_B32: _SOP1Op_S_MOV_B32, + SOP1Op.S_MOV_B64: _SOP1Op_S_MOV_B64, + SOP1Op.S_CMOV_B32: _SOP1Op_S_CMOV_B32, + SOP1Op.S_CMOV_B64: _SOP1Op_S_CMOV_B64, + SOP1Op.S_BREV_B32: _SOP1Op_S_BREV_B32, + SOP1Op.S_BREV_B64: _SOP1Op_S_BREV_B64, + SOP1Op.S_CTZ_I32_B32: _SOP1Op_S_CTZ_I32_B32, + SOP1Op.S_CTZ_I32_B64: _SOP1Op_S_CTZ_I32_B64, + SOP1Op.S_CLZ_I32_U32: _SOP1Op_S_CLZ_I32_U32, + SOP1Op.S_CLZ_I32_U64: _SOP1Op_S_CLZ_I32_U64, + SOP1Op.S_CLS_I32: _SOP1Op_S_CLS_I32, + SOP1Op.S_CLS_I32_I64: _SOP1Op_S_CLS_I32_I64, + SOP1Op.S_SEXT_I32_I8: _SOP1Op_S_SEXT_I32_I8, + SOP1Op.S_SEXT_I32_I16: _SOP1Op_S_SEXT_I32_I16, + SOP1Op.S_BITSET0_B32: _SOP1Op_S_BITSET0_B32, + SOP1Op.S_BITSET0_B64: _SOP1Op_S_BITSET0_B64, + SOP1Op.S_BITSET1_B32: _SOP1Op_S_BITSET1_B32, + SOP1Op.S_BITSET1_B64: _SOP1Op_S_BITSET1_B64, + SOP1Op.S_BITREPLICATE_B64_B32: _SOP1Op_S_BITREPLICATE_B64_B32, + SOP1Op.S_ABS_I32: _SOP1Op_S_ABS_I32, + SOP1Op.S_BCNT0_I32_B32: _SOP1Op_S_BCNT0_I32_B32, + SOP1Op.S_BCNT0_I32_B64: _SOP1Op_S_BCNT0_I32_B64, + SOP1Op.S_BCNT1_I32_B32: _SOP1Op_S_BCNT1_I32_B32, + SOP1Op.S_BCNT1_I32_B64: _SOP1Op_S_BCNT1_I32_B64, + SOP1Op.S_NOT_B32: _SOP1Op_S_NOT_B32, + SOP1Op.S_NOT_B64: _SOP1Op_S_NOT_B64, + SOP1Op.S_AND_SAVEEXEC_B32: _SOP1Op_S_AND_SAVEEXEC_B32, + SOP1Op.S_AND_SAVEEXEC_B64: _SOP1Op_S_AND_SAVEEXEC_B64, + SOP1Op.S_OR_SAVEEXEC_B32: _SOP1Op_S_OR_SAVEEXEC_B32, + SOP1Op.S_OR_SAVEEXEC_B64: _SOP1Op_S_OR_SAVEEXEC_B64, + SOP1Op.S_XOR_SAVEEXEC_B32: _SOP1Op_S_XOR_SAVEEXEC_B32, + SOP1Op.S_XOR_SAVEEXEC_B64: _SOP1Op_S_XOR_SAVEEXEC_B64, + SOP1Op.S_NAND_SAVEEXEC_B32: _SOP1Op_S_NAND_SAVEEXEC_B32, + SOP1Op.S_NAND_SAVEEXEC_B64: _SOP1Op_S_NAND_SAVEEXEC_B64, + SOP1Op.S_NOR_SAVEEXEC_B32: _SOP1Op_S_NOR_SAVEEXEC_B32, + SOP1Op.S_NOR_SAVEEXEC_B64: _SOP1Op_S_NOR_SAVEEXEC_B64, + SOP1Op.S_XNOR_SAVEEXEC_B32: _SOP1Op_S_XNOR_SAVEEXEC_B32, + SOP1Op.S_XNOR_SAVEEXEC_B64: _SOP1Op_S_XNOR_SAVEEXEC_B64, + SOP1Op.S_AND_NOT0_SAVEEXEC_B32: _SOP1Op_S_AND_NOT0_SAVEEXEC_B32, + SOP1Op.S_AND_NOT0_SAVEEXEC_B64: _SOP1Op_S_AND_NOT0_SAVEEXEC_B64, + SOP1Op.S_OR_NOT0_SAVEEXEC_B32: _SOP1Op_S_OR_NOT0_SAVEEXEC_B32, + SOP1Op.S_OR_NOT0_SAVEEXEC_B64: _SOP1Op_S_OR_NOT0_SAVEEXEC_B64, + SOP1Op.S_AND_NOT1_SAVEEXEC_B32: _SOP1Op_S_AND_NOT1_SAVEEXEC_B32, + SOP1Op.S_AND_NOT1_SAVEEXEC_B64: _SOP1Op_S_AND_NOT1_SAVEEXEC_B64, + SOP1Op.S_OR_NOT1_SAVEEXEC_B32: _SOP1Op_S_OR_NOT1_SAVEEXEC_B32, + SOP1Op.S_OR_NOT1_SAVEEXEC_B64: _SOP1Op_S_OR_NOT1_SAVEEXEC_B64, + SOP1Op.S_AND_NOT0_WREXEC_B32: _SOP1Op_S_AND_NOT0_WREXEC_B32, + SOP1Op.S_AND_NOT0_WREXEC_B64: _SOP1Op_S_AND_NOT0_WREXEC_B64, + SOP1Op.S_AND_NOT1_WREXEC_B32: _SOP1Op_S_AND_NOT1_WREXEC_B32, + SOP1Op.S_AND_NOT1_WREXEC_B64: _SOP1Op_S_AND_NOT1_WREXEC_B64, + SOP1Op.S_SENDMSG_RTN_B32: _SOP1Op_S_SENDMSG_RTN_B32, + SOP1Op.S_SENDMSG_RTN_B64: _SOP1Op_S_SENDMSG_RTN_B64, + SOP1Op.S_BARRIER_SIGNAL: _SOP1Op_S_BARRIER_SIGNAL, + SOP1Op.S_BARRIER_SIGNAL_ISFIRST: _SOP1Op_S_BARRIER_SIGNAL_ISFIRST, + SOP1Op.S_GET_BARRIER_STATE: _SOP1Op_S_GET_BARRIER_STATE, + SOP1Op.S_ALLOC_VGPR: _SOP1Op_S_ALLOC_VGPR, + SOP1Op.S_SLEEP_VAR: _SOP1Op_S_SLEEP_VAR, + SOP1Op.S_CEIL_F32: _SOP1Op_S_CEIL_F32, + SOP1Op.S_FLOOR_F32: _SOP1Op_S_FLOOR_F32, + SOP1Op.S_TRUNC_F32: _SOP1Op_S_TRUNC_F32, + SOP1Op.S_RNDNE_F32: _SOP1Op_S_RNDNE_F32, + SOP1Op.S_CVT_F32_I32: _SOP1Op_S_CVT_F32_I32, + SOP1Op.S_CVT_F32_U32: _SOP1Op_S_CVT_F32_U32, + SOP1Op.S_CVT_I32_F32: _SOP1Op_S_CVT_I32_F32, + SOP1Op.S_CVT_U32_F32: _SOP1Op_S_CVT_U32_F32, + SOP1Op.S_CVT_F16_F32: _SOP1Op_S_CVT_F16_F32, + SOP1Op.S_CVT_F32_F16: _SOP1Op_S_CVT_F32_F16, + SOP1Op.S_CVT_HI_F32_F16: _SOP1Op_S_CVT_HI_F32_F16, + SOP1Op.S_CEIL_F16: _SOP1Op_S_CEIL_F16, + SOP1Op.S_FLOOR_F16: _SOP1Op_S_FLOOR_F16, + SOP1Op.S_TRUNC_F16: _SOP1Op_S_TRUNC_F16, + SOP1Op.S_RNDNE_F16: _SOP1Op_S_RNDNE_F16, +} + +def _SOP2Op_S_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 64'U(S0.u32) + 64'U(S1.u32); + # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg((S0.u32) + (S1.u32)) + SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32 - S1.u32; + # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.u32 - S1.u32) + SCC = Reg(((1) if (S1.u32 > S0.u32) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_ADD_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.i32 + S1.i32; + # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); + # D0.i32 = tmp.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.i32 + S1.i32) + SCC = Reg(((S0.u32[31] == S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) + D0.i32 = tmp.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_SUB_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.i32 - S1.i32; + # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); + # D0.i32 = tmp.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.i32 - S1.i32) + SCC = Reg(((S0.u32[31] != S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) + D0.i32 = tmp.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; + # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg((S0.u32) + (S1.u32) + SCC.u64) + SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32 - S1.u32 - SCC.u32; + # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S0.u32 - S1.u32 - SCC.u32) + SCC = Reg(((1) if ((S1.u32) + SCC.u64 > (S0.u32)) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 - S1.i32; + # if D0.i32 < 0 then + # D0.i32 = -D0.i32 + # endif; + # SCC = D0.i32 != 0 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.i32 = S0.i32 - S1.i32 + if D0.i32 < 0: + D0.i32 = -D0.i32 + SCC = Reg(D0.i32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 << S1[4 : 0].u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 << S1[4 : 0].u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 << S1[5 : 0].u32); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 << S1[5 : 0].u32) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 >> S1[4 : 0].u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 >> S1[4 : 0].u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 >> S1[5 : 0].u32); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 >> S1[5 : 0].u32) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); + # SCC = D0.i32 != 0 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.i32 = (signext(S0.i32) >> S1[4 : 0].u32) + SCC = Reg(D0.i32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); + # SCC = D0.i64 != 0LL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32) + SCC = Reg(D0.i64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); + # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(((S0.u32) << 1) + (S1.u32)) + SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); + # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(((S0.u32) << 2) + (S1.u32)) + SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); + # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(((S0.u32) << 3) + (S1.u32)) + SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); + # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(((S0.u32) << 4) + (S1.u32)) + SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 < S1.i32; + # D0.i32 = SCC ? S0.i32 : S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 < S1.i32) + D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 < S1.u32; + # D0.u32 = SCC ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 < S1.u32) + D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 >= S1.i32; + # D0.i32 = SCC ? S0.i32 : S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 >= S1.i32) + D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 >= S1.u32; + # D0.u32 = SCC ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 >= S1.u32) + D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 & S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 & S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 & S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 & S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 | S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 | S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 | S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 | S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 ^ S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 ^ S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 ^ S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 ^ S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~(S0.u32 & S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = ~(S0.u32 & S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = ~(S0.u64 & S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = ~(S0.u64 & S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~(S0.u32 | S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = ~(S0.u32 | S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = ~(S0.u64 | S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = ~(S0.u64 | S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~(S0.u32 ^ S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = ~(S0.u32 ^ S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = ~(S0.u64 ^ S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = ~(S0.u64 ^ S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 & ~S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 & ~S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 & ~S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 & ~S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 | ~S1.u32); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 | ~S1.u32) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S0.u64 | ~S1.u64); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = (S0.u64 | ~S1.u64) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); + # SCC = D0.u32 != 0U + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) + SCC = Reg(D0.u32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); + # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); + # SCC = D0.i32 != 0 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) + D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32) + SCC = Reg(D0.i32 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); + # SCC = D0.u64 != 0ULL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) + SCC = Reg(D0.u64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); + # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); + # SCC = D0.i64 != 0LL + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) + D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32) + SCC = Reg(D0.i64 != 0) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64 = (((1 << S0[5 : 0].u32) - 1) << S1[5 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 * S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = S0.i32 * S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (((S0.u32) * (S1.u32)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (((S0.i32) * (S1.i32)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = SCC ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = SCC ? S0.u64 : S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + D0.u64 = ((S0.u64) if (SCC) else (S1.u64)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0 = Reg(_pack(S1[15 : 0].u16, S0[15 : 0].u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0 = Reg(_pack(S1[31 : 16].u16, S0[15 : 0].u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0 = Reg(_pack(S1[31 : 16].u16, S0[31 : 16].u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0 = { S1[15 : 0].u16, S0[31 : 16].u16 } + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0 = Reg(_pack(S1[15 : 0].u16, S0[31 : 16].u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 + S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 + S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 - S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 - S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif isNAN(64'F(S0.f32)) then + # D0.f32 = S1.f32 + # elsif isNAN(64'F(S1.f32)) then + # D0.f32 = S0.f32 + # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && + # !sign(S1.f32))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f32 = S0.f32 + # else + # D0.f32 = S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): + TRAPSTS.INVALID = 1 + if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif isNAN(F(S0.f32)): + D0.f32 = S1.f32 + elif isNAN(F(S1.f32)): + D0.f32 = S0.f32 + elif ((S0.f32 < S1.f32) or ((abs(S0.f32) == 0.0) and (abs(S1.f32) == 0.0) and sign(S0.f32) and not sign(S1.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif isNAN(64'F(S0.f32)) then + # D0.f32 = S1.f32 + # elsif isNAN(64'F(S1.f32)) then + # D0.f32 = S0.f32 + # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && + # sign(S1.f32))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f32 = S0.f32 + # else + # D0.f32 = S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): + TRAPSTS.INVALID = 1 + if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif isNAN(F(S0.f32)): + D0.f32 = S1.f32 + elif isNAN(F(S1.f32)): + D0.f32 = S0.f32 + elif ((S0.f32 > S1.f32) or ((abs(S0.f32) == 0.0) and (abs(S1.f32) == 0.0) and not sign(S0.f32) and sign(S1.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 * S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 * S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SIMM32 = Reg(literal) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SIMM32 = Reg(literal) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, S1.f32, D0.f32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, S1.f32, D0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # prev_mode = ROUND_MODE; + # tmp[15 : 0].f16 = f32_to_f16(S0.f32); + # tmp[31 : 16].f16 = f32_to_f16(S1.f32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + prev_mode = ROUND_MODE + tmp[15 : 0].f16 = f32_to_f16(S0.f32) + tmp[31 : 16].f16 = f32_to_f16(S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 + S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 + S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 - S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 - S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif isNAN(64'F(S0.f16)) then + # D0.f16 = S1.f16 + # elsif isNAN(64'F(S1.f16)) then + # D0.f16 = S0.f16 + # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && + # !sign(S1.f16))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f16 = S0.f16 + # else + # D0.f16 = S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): + TRAPSTS.INVALID = 1 + if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif isNAN(F(S0.f16)): + D0.f16 = S1.f16 + elif isNAN(F(S1.f16)): + D0.f16 = S0.f16 + elif ((S0.f16 < S1.f16) or ((abs(S0.f16) == 0.0) and (abs(S1.f16) == 0.0) and sign(S0.f16) and not sign(S1.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif isNAN(64'F(S0.f16)) then + # D0.f16 = S1.f16 + # elsif isNAN(64'F(S1.f16)) then + # D0.f16 = S0.f16 + # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && + # sign(S1.f16))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f16 = S0.f16 + # else + # D0.f16 = S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): + TRAPSTS.INVALID = 1 + if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif isNAN(F(S0.f16)): + D0.f16 = S1.f16 + elif isNAN(F(S1.f16)): + D0.f16 = S0.f16 + elif ((S0.f16 > S1.f16) or ((abs(S0.f16) == 0.0) and (abs(S1.f16) == 0.0) and not sign(S0.f16) and sign(S1.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 * S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 * S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = fma(S0.f16, S1.f16, D0.f16) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = fma(S0.f16, S1.f16, D0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then + # TRAPSTS.INVALID = 1 + # endif; + # if isSignalNAN(64'F(S0.f32)) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif isSignalNAN(64'F(S1.f32)) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) + # elsif isQuietNAN(64'F(S0.f32)) then + # D0.f32 = S0.f32 + # elsif isQuietNAN(64'F(S1.f32)) then + # D0.f32 = S1.f32 + # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && + # !sign(S1.f32))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f32 = S0.f32 + # else + # D0.f32 = S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): + TRAPSTS.INVALID = 1 + if isSignalNAN(F(S0.f32)): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif isSignalNAN(F(S1.f32)): + D0.f32 = F(cvtToQuietNAN(F(S1.f32))) + elif isQuietNAN(F(S0.f32)): + D0.f32 = S0.f32 + elif isQuietNAN(F(S1.f32)): + D0.f32 = S1.f32 + elif ((S0.f32 < S1.f32) or ((abs(S0.f32) == 0.0) and (abs(S1.f32) == 0.0) and sign(S0.f32) and not sign(S1.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then + # TRAPSTS.INVALID = 1 + # endif; + # if isSignalNAN(64'F(S0.f32)) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif isSignalNAN(64'F(S1.f32)) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) + # elsif isQuietNAN(64'F(S0.f32)) then + # D0.f32 = S0.f32 + # elsif isQuietNAN(64'F(S1.f32)) then + # D0.f32 = S1.f32 + # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && + # sign(S1.f32))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f32 = S0.f32 + # else + # D0.f32 = S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): + TRAPSTS.INVALID = 1 + if isSignalNAN(F(S0.f32)): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif isSignalNAN(F(S1.f32)): + D0.f32 = F(cvtToQuietNAN(F(S1.f32))) + elif isQuietNAN(F(S0.f32)): + D0.f32 = S0.f32 + elif isQuietNAN(F(S1.f32)): + D0.f32 = S1.f32 + elif ((S0.f32 > S1.f32) or ((abs(S0.f32) == 0.0) and (abs(S1.f32) == 0.0) and not sign(S0.f32) and sign(S1.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then + # TRAPSTS.INVALID = 1 + # endif; + # if isSignalNAN(64'F(S0.f16)) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif isSignalNAN(64'F(S1.f16)) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) + # elsif isQuietNAN(64'F(S0.f16)) then + # D0.f16 = S0.f16 + # elsif isQuietNAN(64'F(S1.f16)) then + # D0.f16 = S1.f16 + # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && + # !sign(S1.f16))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f16 = S0.f16 + # else + # D0.f16 = S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): + TRAPSTS.INVALID = 1 + if isSignalNAN(F(S0.f16)): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif isSignalNAN(F(S1.f16)): + D0.f16 = F(cvtToQuietNAN(F(S1.f16))) + elif isQuietNAN(F(S0.f16)): + D0.f16 = S0.f16 + elif isQuietNAN(F(S1.f16)): + D0.f16 = S1.f16 + elif ((S0.f16 < S1.f16) or ((abs(S0.f16) == 0.0) and (abs(S1.f16) == 0.0) and sign(S0.f16) and not sign(S1.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then + # TRAPSTS.INVALID = 1 + # endif; + # if isSignalNAN(64'F(S0.f16)) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif isSignalNAN(64'F(S1.f16)) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) + # elsif isQuietNAN(64'F(S0.f16)) then + # D0.f16 = S0.f16 + # elsif isQuietNAN(64'F(S1.f16)) then + # D0.f16 = S1.f16 + # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && + # sign(S1.f16))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f16 = S0.f16 + # else + # D0.f16 = S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): + TRAPSTS.INVALID = 1 + if isSignalNAN(F(S0.f16)): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif isSignalNAN(F(S1.f16)): + D0.f16 = F(cvtToQuietNAN(F(S1.f16))) + elif isQuietNAN(F(S0.f16)): + D0.f16 = S0.f16 + elif isQuietNAN(F(S1.f16)): + D0.f16 = S1.f16 + elif ((S0.f16 > S1.f16) or ((abs(S0.f16) == 0.0) and (abs(S1.f16) == 0.0) and not sign(S0.f16) and sign(S1.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOP2Op_S_ADD_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = S0.u64 + S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64 = S0.u64 + S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_SUB_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = S0.u64 - S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64 = S0.u64 - S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _SOP2Op_S_MUL_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = S0.u64 * S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64 = S0.u64 * S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +SOP2Op_FUNCTIONS = { + SOP2Op.S_ADD_CO_U32: _SOP2Op_S_ADD_CO_U32, + SOP2Op.S_SUB_CO_U32: _SOP2Op_S_SUB_CO_U32, + SOP2Op.S_ADD_CO_I32: _SOP2Op_S_ADD_CO_I32, + SOP2Op.S_SUB_CO_I32: _SOP2Op_S_SUB_CO_I32, + SOP2Op.S_ADD_CO_CI_U32: _SOP2Op_S_ADD_CO_CI_U32, + SOP2Op.S_SUB_CO_CI_U32: _SOP2Op_S_SUB_CO_CI_U32, + SOP2Op.S_ABSDIFF_I32: _SOP2Op_S_ABSDIFF_I32, + SOP2Op.S_LSHL_B32: _SOP2Op_S_LSHL_B32, + SOP2Op.S_LSHL_B64: _SOP2Op_S_LSHL_B64, + SOP2Op.S_LSHR_B32: _SOP2Op_S_LSHR_B32, + SOP2Op.S_LSHR_B64: _SOP2Op_S_LSHR_B64, + SOP2Op.S_ASHR_I32: _SOP2Op_S_ASHR_I32, + SOP2Op.S_ASHR_I64: _SOP2Op_S_ASHR_I64, + SOP2Op.S_LSHL1_ADD_U32: _SOP2Op_S_LSHL1_ADD_U32, + SOP2Op.S_LSHL2_ADD_U32: _SOP2Op_S_LSHL2_ADD_U32, + SOP2Op.S_LSHL3_ADD_U32: _SOP2Op_S_LSHL3_ADD_U32, + SOP2Op.S_LSHL4_ADD_U32: _SOP2Op_S_LSHL4_ADD_U32, + SOP2Op.S_MIN_I32: _SOP2Op_S_MIN_I32, + SOP2Op.S_MIN_U32: _SOP2Op_S_MIN_U32, + SOP2Op.S_MAX_I32: _SOP2Op_S_MAX_I32, + SOP2Op.S_MAX_U32: _SOP2Op_S_MAX_U32, + SOP2Op.S_AND_B32: _SOP2Op_S_AND_B32, + SOP2Op.S_AND_B64: _SOP2Op_S_AND_B64, + SOP2Op.S_OR_B32: _SOP2Op_S_OR_B32, + SOP2Op.S_OR_B64: _SOP2Op_S_OR_B64, + SOP2Op.S_XOR_B32: _SOP2Op_S_XOR_B32, + SOP2Op.S_XOR_B64: _SOP2Op_S_XOR_B64, + SOP2Op.S_NAND_B32: _SOP2Op_S_NAND_B32, + SOP2Op.S_NAND_B64: _SOP2Op_S_NAND_B64, + SOP2Op.S_NOR_B32: _SOP2Op_S_NOR_B32, + SOP2Op.S_NOR_B64: _SOP2Op_S_NOR_B64, + SOP2Op.S_XNOR_B32: _SOP2Op_S_XNOR_B32, + SOP2Op.S_XNOR_B64: _SOP2Op_S_XNOR_B64, + SOP2Op.S_AND_NOT1_B32: _SOP2Op_S_AND_NOT1_B32, + SOP2Op.S_AND_NOT1_B64: _SOP2Op_S_AND_NOT1_B64, + SOP2Op.S_OR_NOT1_B32: _SOP2Op_S_OR_NOT1_B32, + SOP2Op.S_OR_NOT1_B64: _SOP2Op_S_OR_NOT1_B64, + SOP2Op.S_BFE_U32: _SOP2Op_S_BFE_U32, + SOP2Op.S_BFE_I32: _SOP2Op_S_BFE_I32, + SOP2Op.S_BFE_U64: _SOP2Op_S_BFE_U64, + SOP2Op.S_BFE_I64: _SOP2Op_S_BFE_I64, + SOP2Op.S_BFM_B32: _SOP2Op_S_BFM_B32, + SOP2Op.S_BFM_B64: _SOP2Op_S_BFM_B64, + SOP2Op.S_MUL_I32: _SOP2Op_S_MUL_I32, + SOP2Op.S_MUL_HI_U32: _SOP2Op_S_MUL_HI_U32, + SOP2Op.S_MUL_HI_I32: _SOP2Op_S_MUL_HI_I32, + SOP2Op.S_CSELECT_B32: _SOP2Op_S_CSELECT_B32, + SOP2Op.S_CSELECT_B64: _SOP2Op_S_CSELECT_B64, + SOP2Op.S_PACK_LL_B32_B16: _SOP2Op_S_PACK_LL_B32_B16, + SOP2Op.S_PACK_LH_B32_B16: _SOP2Op_S_PACK_LH_B32_B16, + SOP2Op.S_PACK_HH_B32_B16: _SOP2Op_S_PACK_HH_B32_B16, + SOP2Op.S_PACK_HL_B32_B16: _SOP2Op_S_PACK_HL_B32_B16, + SOP2Op.S_ADD_F32: _SOP2Op_S_ADD_F32, + SOP2Op.S_SUB_F32: _SOP2Op_S_SUB_F32, + SOP2Op.S_MIN_NUM_F32: _SOP2Op_S_MIN_NUM_F32, + SOP2Op.S_MAX_NUM_F32: _SOP2Op_S_MAX_NUM_F32, + SOP2Op.S_MUL_F32: _SOP2Op_S_MUL_F32, + SOP2Op.S_FMAAK_F32: _SOP2Op_S_FMAAK_F32, + SOP2Op.S_FMAMK_F32: _SOP2Op_S_FMAMK_F32, + SOP2Op.S_FMAC_F32: _SOP2Op_S_FMAC_F32, + SOP2Op.S_CVT_PK_RTZ_F16_F32: _SOP2Op_S_CVT_PK_RTZ_F16_F32, + SOP2Op.S_ADD_F16: _SOP2Op_S_ADD_F16, + SOP2Op.S_SUB_F16: _SOP2Op_S_SUB_F16, + SOP2Op.S_MIN_NUM_F16: _SOP2Op_S_MIN_NUM_F16, + SOP2Op.S_MAX_NUM_F16: _SOP2Op_S_MAX_NUM_F16, + SOP2Op.S_MUL_F16: _SOP2Op_S_MUL_F16, + SOP2Op.S_FMAC_F16: _SOP2Op_S_FMAC_F16, + SOP2Op.S_MINIMUM_F32: _SOP2Op_S_MINIMUM_F32, + SOP2Op.S_MAXIMUM_F32: _SOP2Op_S_MAXIMUM_F32, + SOP2Op.S_MINIMUM_F16: _SOP2Op_S_MINIMUM_F16, + SOP2Op.S_MAXIMUM_F16: _SOP2Op_S_MAXIMUM_F16, + SOP2Op.S_ADD_NC_U64: _SOP2Op_S_ADD_NC_U64, + SOP2Op.S_SUB_NC_U64: _SOP2Op_S_SUB_NC_U64, + SOP2Op.S_MUL_U64: _SOP2Op_S_MUL_U64, +} + +def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 == S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 == S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 <> S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 != S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 > S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 > S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 >= S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 >= S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 < S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 < S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.i32 <= S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.i32 <= S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 == S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 == S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 <> S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 != S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 > S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 > S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 >= S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 >= S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 < S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 < S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32 <= S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32 <= S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32[S1.u32[4 : 0]] == 0) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u32[S1.u32[4 : 0]] == 1) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u64[S1.u32[5 : 0]] == 0) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u64[S1.u32[5 : 0]] == 1) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u64 == S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u64 == S1.u64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.u64 <> S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.u64 != S1.u64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.f32 < S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.f32 < S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.f16 < S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.f16 < S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.f32 == S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.f32 == S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.f16 == S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.f16 == S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.f32 <= S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.f32 <= S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.f16 <= S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.f16 <= S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.f32 > S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.f32 > S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.f16 > S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.f16 > S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.f32 <> S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.f32 != S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.f16 <> S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.f16 != S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.f32 >= S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.f32 >= S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = S0.f16 >= S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(S0.f16 >= S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32)))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg(( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16)))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg((isNAN(F(S0.f32)) or isNAN(F(S1.f32)))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg((isNAN(F(S0.f16)) or isNAN(F(S1.f16)))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = !(S0.f32 >= S1.f32); + # // With NAN inputs this is not the same operation as < + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg( not (S0.f32 >= S1.f32)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = !(S0.f16 >= S1.f16); + # // With NAN inputs this is not the same operation as < + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg( not (S0.f16 >= S1.f16)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = !(S0.f32 <> S1.f32); + # // With NAN inputs this is not the same operation as == + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg( not (S0.f32 != S1.f32)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = !(S0.f16 <> S1.f16); + # // With NAN inputs this is not the same operation as == + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg( not (S0.f16 != S1.f16)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = !(S0.f32 > S1.f32); + # // With NAN inputs this is not the same operation as <= + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg( not (S0.f32 > S1.f32)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = !(S0.f16 > S1.f16); + # // With NAN inputs this is not the same operation as <= + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg( not (S0.f16 > S1.f16)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = !(S0.f32 <= S1.f32); + # // With NAN inputs this is not the same operation as > + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg( not (S0.f32 <= S1.f32)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = !(S0.f16 <= S1.f16); + # // With NAN inputs this is not the same operation as > + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg( not (S0.f16 <= S1.f16)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = !(S0.f32 == S1.f32); + # // With NAN inputs this is not the same operation as != + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg( not (S0.f32 == S1.f32)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = !(S0.f16 == S1.f16); + # // With NAN inputs this is not the same operation as != + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg( not (S0.f16 == S1.f16)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = !(S0.f32 < S1.f32); + # // With NAN inputs this is not the same operation as >= + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg( not (S0.f32 < S1.f32)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # SCC = !(S0.f16 < S1.f16); + # // With NAN inputs this is not the same operation as >= + S0 = Reg(s0) + S1 = Reg(s1) + SCC = Reg(scc) + # --- compiled pseudocode --- + SCC = Reg( not (S0.f16 < S1.f16)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + return result + +SOPCOp_FUNCTIONS = { + SOPCOp.S_CMP_EQ_I32: _SOPCOp_S_CMP_EQ_I32, + SOPCOp.S_CMP_LG_I32: _SOPCOp_S_CMP_LG_I32, + SOPCOp.S_CMP_GT_I32: _SOPCOp_S_CMP_GT_I32, + SOPCOp.S_CMP_GE_I32: _SOPCOp_S_CMP_GE_I32, + SOPCOp.S_CMP_LT_I32: _SOPCOp_S_CMP_LT_I32, + SOPCOp.S_CMP_LE_I32: _SOPCOp_S_CMP_LE_I32, + SOPCOp.S_CMP_EQ_U32: _SOPCOp_S_CMP_EQ_U32, + SOPCOp.S_CMP_LG_U32: _SOPCOp_S_CMP_LG_U32, + SOPCOp.S_CMP_GT_U32: _SOPCOp_S_CMP_GT_U32, + SOPCOp.S_CMP_GE_U32: _SOPCOp_S_CMP_GE_U32, + SOPCOp.S_CMP_LT_U32: _SOPCOp_S_CMP_LT_U32, + SOPCOp.S_CMP_LE_U32: _SOPCOp_S_CMP_LE_U32, + SOPCOp.S_BITCMP0_B32: _SOPCOp_S_BITCMP0_B32, + SOPCOp.S_BITCMP1_B32: _SOPCOp_S_BITCMP1_B32, + SOPCOp.S_BITCMP0_B64: _SOPCOp_S_BITCMP0_B64, + SOPCOp.S_BITCMP1_B64: _SOPCOp_S_BITCMP1_B64, + SOPCOp.S_CMP_EQ_U64: _SOPCOp_S_CMP_EQ_U64, + SOPCOp.S_CMP_LG_U64: _SOPCOp_S_CMP_LG_U64, + SOPCOp.S_CMP_LT_F32: _SOPCOp_S_CMP_LT_F32, + SOPCOp.S_CMP_LT_F16: _SOPCOp_S_CMP_LT_F16, + SOPCOp.S_CMP_EQ_F32: _SOPCOp_S_CMP_EQ_F32, + SOPCOp.S_CMP_EQ_F16: _SOPCOp_S_CMP_EQ_F16, + SOPCOp.S_CMP_LE_F32: _SOPCOp_S_CMP_LE_F32, + SOPCOp.S_CMP_LE_F16: _SOPCOp_S_CMP_LE_F16, + SOPCOp.S_CMP_GT_F32: _SOPCOp_S_CMP_GT_F32, + SOPCOp.S_CMP_GT_F16: _SOPCOp_S_CMP_GT_F16, + SOPCOp.S_CMP_LG_F32: _SOPCOp_S_CMP_LG_F32, + SOPCOp.S_CMP_LG_F16: _SOPCOp_S_CMP_LG_F16, + SOPCOp.S_CMP_GE_F32: _SOPCOp_S_CMP_GE_F32, + SOPCOp.S_CMP_GE_F16: _SOPCOp_S_CMP_GE_F16, + SOPCOp.S_CMP_O_F32: _SOPCOp_S_CMP_O_F32, + SOPCOp.S_CMP_O_F16: _SOPCOp_S_CMP_O_F16, + SOPCOp.S_CMP_U_F32: _SOPCOp_S_CMP_U_F32, + SOPCOp.S_CMP_U_F16: _SOPCOp_S_CMP_U_F16, + SOPCOp.S_CMP_NGE_F32: _SOPCOp_S_CMP_NGE_F32, + SOPCOp.S_CMP_NGE_F16: _SOPCOp_S_CMP_NGE_F16, + SOPCOp.S_CMP_NLG_F32: _SOPCOp_S_CMP_NLG_F32, + SOPCOp.S_CMP_NLG_F16: _SOPCOp_S_CMP_NLG_F16, + SOPCOp.S_CMP_NGT_F32: _SOPCOp_S_CMP_NGT_F32, + SOPCOp.S_CMP_NGT_F16: _SOPCOp_S_CMP_NGT_F16, + SOPCOp.S_CMP_NLE_F32: _SOPCOp_S_CMP_NLE_F32, + SOPCOp.S_CMP_NLE_F16: _SOPCOp_S_CMP_NLE_F16, + SOPCOp.S_CMP_NEQ_F32: _SOPCOp_S_CMP_NEQ_F32, + SOPCOp.S_CMP_NEQ_F16: _SOPCOp_S_CMP_NEQ_F16, + SOPCOp.S_CMP_NLT_F32: _SOPCOp_S_CMP_NLT_F32, + SOPCOp.S_CMP_NLT_F16: _SOPCOp_S_CMP_NLT_F16, +} + +def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(signext(S0.i16)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (signext(S0.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // Do nothing - for use by tools only + # --- compiled pseudocode --- + + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if SCC then + # D0.i32 = 32'I(signext(S0.i16)) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + # --- compiled pseudocode --- + if SCC: + D0.i32 = (signext(S0.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_ADDK_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.i32; + # D0.i32 = D0.i32 + 32'I(signext(S0.i16)); + # SCC = ((tmp[31] == S0.i16[15]) && (tmp[31] != D0.i32[31])); + S0 = Reg(s0) + D0 = Reg(d0) + SCC = Reg(scc) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.i32) + D0.i32 = D0.i32 + (signext(S0.i16)) + SCC = Reg(((tmp[31] == S0.i16[15]) and (tmp[31] != D0.i32[31]))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': SCC._val & 1} + return result + +def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = D0.i32 * 32'I(signext(S0.i16)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = D0.i32 * (signext(S0.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +SOPKOp_FUNCTIONS = { + SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, + SOPKOp.S_VERSION: _SOPKOp_S_VERSION, + SOPKOp.S_CMOVK_I32: _SOPKOp_S_CMOVK_I32, + SOPKOp.S_ADDK_CO_I32: _SOPKOp_S_ADDK_CO_I32, + SOPKOp.S_MULK_I32: _SOPKOp_S_MULK_I32, +} + +def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # for i in 0U : SIMM16.u16[3 : 0].u32 do + # endfor + SIMM16 = Reg(literal) + # --- compiled pseudocode --- + for i in range(0, int(SIMM16.u16[3 : 0].u32)+1): + pass + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # instruction may be omitted. For wave64 the compiler may not know the status of the EXEC mask and hence + # // 1 cycle delay here + # // 2 cycles delay here + EXEC = Reg(exec_mask) + # --- compiled pseudocode --- + + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // PC passed into trap handler points to S_TRAP itself, + # // trap base address + # --- compiled pseudocode --- + + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _SOPPOp_S_BARRIER_WAIT(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // barrierBit 0: reserved + # // barrierBit 1: workgroup + # // barrierBit 2: trap + # // Implemented as a power-saving idle + # --- compiled pseudocode --- + + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +SOPPOp_FUNCTIONS = { + SOPPOp.S_NOP: _SOPPOp_S_NOP, + SOPPOp.S_DELAY_ALU: _SOPPOp_S_DELAY_ALU, + SOPPOp.S_TRAP: _SOPPOp_S_TRAP, + SOPPOp.S_BARRIER_WAIT: _SOPPOp_S_BARRIER_WAIT, +} + +def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.b32 = S0.b32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b32 = S0.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare lane : 32'U; + # if WAVE64 then + # // 64 lanes + # if EXEC == 0x0LL then + # lane = 0U; + # // Force lane 0 if all lanes are disabled + # else + # lane = 32'U(s_ff1_i32_b64(EXEC)); + # // Lowest active lane + # endif + # else + # // 32 lanes + # if EXEC_LO.i32 == 0 then + # lane = 0U; + # // Force lane 0 if all lanes are disabled + # else + # lane = 32'U(s_ff1_i32_b32(EXEC_LO)); + # // Lowest active lane + # endif + # endif; + # D0.b32 = VGPR[lane][SRC0.u32] + D0 = Reg(d0) + EXEC = Reg(exec_mask) + SRC0 = Reg(src0_idx) + EXEC_LO = SliceProxy(EXEC, 31, 0) + # --- compiled pseudocode --- + if WAVE64: + if EXEC == 0x0: + lane = 0 + else: + lane = (s_ff1_i32_b64(EXEC)) + else: + if EXEC_LO.i32 == 0: + lane = 0 + else: + lane = (s_ff1_i32_b32(EXEC_LO)) + D0.b32 = VGPR[lane][SRC0.u32] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f64_to_i32(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f64_to_i32(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = i32_to_f64(S0.i32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = i32_to_f64(S0.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = i32_to_f32(S0.i32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = i32_to_f32(S0.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0.u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = f32_to_u32(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = f32_to_u32(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = f32_to_f16(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = f32_to_f16(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = f16_to_f32(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = f16_to_f32(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(floor(S0.f32)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(floor(S0.f32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = f64_to_f32(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = f64_to_f32(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = f32_to_f64(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = f32_to_f64(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[7 : 0].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[7 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[15 : 8].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[15 : 8].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[23 : 16].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[23 : 16].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[31 : 24].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[31 : 24].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = f64_to_u32(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = f64_to_u32(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = u32_to_f64(S0.u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = u32_to_f64(S0.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = trunc(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = trunc(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = trunc(S0.f64); + # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then + # D0.f64 += 1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = trunc(S0.f64) + if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): + D0.f64 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = floor(S0.f64 + 0.5); + # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then + # D0.f64 -= 1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = floor(S0.f64 + 0.5) + if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): + D0.f64 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = trunc(S0.f64); + # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then + # D0.f64 += -1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = trunc(S0.f64) + if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): + D0.f64 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.b16 = S0.b16 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b16 = S0.b16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 + -floor(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 + -floor(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32); + # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then + # D0.f32 += 1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): + D0.f32 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = floor(S0.f32 + 0.5F); + # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then + # D0.f32 -= 1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = floor(S0.f32 + 0.5) + if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): + D0.f32 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32); + # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then + # D0.f32 += -1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): + D0.f32 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = pow(2.0F, S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = pow(2.0, S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = log2(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = log2(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / S0.f32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / S0.f32; + # // Can only raise integer DIV_BY_ZERO exception + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / sqrt(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / sqrt(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = 1.0 / S0.f64 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = 1.0 / S0.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = 1.0 / sqrt(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = 1.0 / sqrt(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = sqrt(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = sqrt(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = sqrt(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = sqrt(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = sin(S0.f32 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = cos(S0.f32 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~S0.u32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ~S0.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32[31 : 0] = S0.u32[0 : 31] + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32[31 : 0] = S0.u32[0 : 31] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = -1; + # // Set if no ones are found + # for i in 0 : 31 do + # // Search from MSB + # if S0.u32[31 - i] == 1'1U then + # D0.i32 = i; + # endif + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = -1 + for i in range(0, int(31)+1): + if S0.u32[31 - i] == 1: + D0.i32 = i; break + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = -1; + # // Set if no ones are found + # for i in 0 : 31 do + # // Search from LSB + # if S0.u32[i] == 1'1U then + # D0.i32 = i; + # endif + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = -1 + for i in range(0, int(31)+1): + if S0.u32[i] == 1: + D0.i32 = i; break + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = -1; + # // Set if all bits are the same + # for i in 1 : 31 do + # // Search from MSB + # if S0.i32[31 - i] != S0.i32[31] then + # D0.i32 = i; + # endif + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = -1 + for i in range(1, int(31)+1): + if S0.i32[31 - i] != S0.i32[31]: + D0.i32 = i + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then + # D0.i32 = 0 + # else + # D0.i32 = exponent(S0.f64) - 1023 + 1 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): + D0.i32 = 0 + else: + D0.i32 = exponent(S0.f64) - 1023 + 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then + # D0.f64 = S0.f64 + # else + # D0.f64 = mantissa(S0.f64) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): + D0.f64 = S0.f64 + else: + D0.f64 = mantissa(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = S0.f64 + -floor(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = S0.f64 + -floor(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then + # D0.i32 = 0 + # else + # D0.i32 = exponent(S0.f32) - 127 + 1 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): + D0.i32 = 0 + else: + D0.i32 = exponent(S0.f32) - 127 + 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then + # D0.f32 = S0.f32 + # else + # D0.f32 = mantissa(S0.f32) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = mantissa(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # addr = SRC0.u32; + # // Raw value from instruction + # D0.b32 = VGPR[laneId][addr].b32 + D0 = Reg(d0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + addr = SRC0.u32 + D0.b32 = VGPR[laneId][addr].b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = u16_to_f16(S0.u16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = u16_to_f16(S0.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = i16_to_f16(S0.i16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = i16_to_f16(S0.i16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = f16_to_u16(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = f16_to_u16(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = f16_to_i16(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = f16_to_i16(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = 16'1.0 / S0.f16 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = 1.0 / S0.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = sqrt(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = sqrt(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = 16'1.0 / sqrt(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = 1.0 / sqrt(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = log2(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = log2(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = pow(16'2.0, S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = pow(2.0, S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then + # D0.f16 = S0.f16 + # else + # D0.f16 = mantissa(S0.f16) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = mantissa(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then + # D0.i16 = 16'0 + # else + # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): + D0.i16 = 0 + else: + D0.i16 = (exponent(S0.f16) - 15 + 1) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = trunc(S0.f16); + # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then + # D0.f16 += -16'1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = trunc(S0.f16) + if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): + D0.f16 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = trunc(S0.f16); + # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then + # D0.f16 += 16'1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = trunc(S0.f16) + if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): + D0.f16 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = trunc(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = trunc(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = floor(S0.f16 + 16'0.5); + # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then + # D0.f16 -= 16'1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = floor(S0.f16 + 0.5) + if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): + D0.f16 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 + -floor(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 + -floor(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = sin(S0.f16 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = cos(S0.f16 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if n <= 16'0 then + # elsif n >= 16'255 then + # else + # endif); + # tmp = 16'0; + # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); + # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); + # D0.b16 = tmp.b16 + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + if n <= 0: + pass + elif n >= 255: + pass + else: + pass + tmp = Reg(0) + tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) + tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) + D0.b16 = tmp.b16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = f16_to_snorm(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = f16_to_snorm(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = f16_to_unorm(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = f16_to_unorm(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.b32; + # D0.b32 = S0.b32; + # S0.b32 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.b32) + D0.b32 = S0.b32 + S0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = D0.b16; + # D0.b16 = S0.b16; + # S0.b16 = tmp + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(D0.b16) + D0.b16 = S0.b16 + S0.b16 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = ~S0.u16 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = ~S0.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(signext(S0.i16)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (signext(S0.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0 = { 16'0, S0.u16 } + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0 = Reg(_pack(0, S0.u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if OPSEL[1 : 0].u2 == 2'0U then + # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].fp8) + # elsif OPSEL[1 : 0].u2 == 2'2U then + # // Byte select bits are reversed + # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].fp8) + # elsif OPSEL[1 : 0].u2 == 2'1U then + # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].fp8) + # else + # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].fp8) + # endif + D0 = Reg(d0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + if OPSEL[1 : 0].u2 == 0: + D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].fp8) + elif OPSEL[1 : 0].u2 == 2: + D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].fp8) + elif OPSEL[1 : 0].u2 == 1: + D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].fp8) + else: + D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].fp8) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if OPSEL[1 : 0].u2 == 2'0U then + # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].bf8) + # elsif OPSEL[1 : 0].u2 == 2'2U then + # // Byte select bits are reversed + # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].bf8) + # elsif OPSEL[1 : 0].u2 == 2'1U then + # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].bf8) + # else + # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].bf8) + # endif + D0 = Reg(d0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + if OPSEL[1 : 0].u2 == 0: + D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].bf8) + elif OPSEL[1 : 0].u2 == 2: + D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].bf8) + elif OPSEL[1 : 0].u2 == 1: + D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].bf8) + else: + D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].bf8) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; + # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); + # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) + D0 = Reg(d0) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + tmp = Reg(((VGPR[laneId][SRC0.u32][31 : 16]) if (OPSEL[0].u1) else (VGPR[laneId][SRC0.u32][15 : 0]))) + D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8) + D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; + # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); + # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) + D0 = Reg(d0) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + tmp = Reg(((VGPR[laneId][SRC0.u32][31 : 16]) if (OPSEL[0].u1) else (VGPR[laneId][SRC0.u32][15 : 0]))) + D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8) + D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +VOP1Op_FUNCTIONS = { + VOP1Op.V_MOV_B32: _VOP1Op_V_MOV_B32, + VOP1Op.V_READFIRSTLANE_B32: _VOP1Op_V_READFIRSTLANE_B32, + VOP1Op.V_CVT_I32_F64: _VOP1Op_V_CVT_I32_F64, + VOP1Op.V_CVT_F64_I32: _VOP1Op_V_CVT_F64_I32, + VOP1Op.V_CVT_F32_I32: _VOP1Op_V_CVT_F32_I32, + VOP1Op.V_CVT_F32_U32: _VOP1Op_V_CVT_F32_U32, + VOP1Op.V_CVT_U32_F32: _VOP1Op_V_CVT_U32_F32, + VOP1Op.V_CVT_I32_F32: _VOP1Op_V_CVT_I32_F32, + VOP1Op.V_CVT_F16_F32: _VOP1Op_V_CVT_F16_F32, + VOP1Op.V_CVT_F32_F16: _VOP1Op_V_CVT_F32_F16, + VOP1Op.V_CVT_NEAREST_I32_F32: _VOP1Op_V_CVT_NEAREST_I32_F32, + VOP1Op.V_CVT_FLOOR_I32_F32: _VOP1Op_V_CVT_FLOOR_I32_F32, + VOP1Op.V_CVT_F32_F64: _VOP1Op_V_CVT_F32_F64, + VOP1Op.V_CVT_F64_F32: _VOP1Op_V_CVT_F64_F32, + VOP1Op.V_CVT_F32_UBYTE0: _VOP1Op_V_CVT_F32_UBYTE0, + VOP1Op.V_CVT_F32_UBYTE1: _VOP1Op_V_CVT_F32_UBYTE1, + VOP1Op.V_CVT_F32_UBYTE2: _VOP1Op_V_CVT_F32_UBYTE2, + VOP1Op.V_CVT_F32_UBYTE3: _VOP1Op_V_CVT_F32_UBYTE3, + VOP1Op.V_CVT_U32_F64: _VOP1Op_V_CVT_U32_F64, + VOP1Op.V_CVT_F64_U32: _VOP1Op_V_CVT_F64_U32, + VOP1Op.V_TRUNC_F64: _VOP1Op_V_TRUNC_F64, + VOP1Op.V_CEIL_F64: _VOP1Op_V_CEIL_F64, + VOP1Op.V_RNDNE_F64: _VOP1Op_V_RNDNE_F64, + VOP1Op.V_FLOOR_F64: _VOP1Op_V_FLOOR_F64, + VOP1Op.V_MOV_B16: _VOP1Op_V_MOV_B16, + VOP1Op.V_FRACT_F32: _VOP1Op_V_FRACT_F32, + VOP1Op.V_TRUNC_F32: _VOP1Op_V_TRUNC_F32, + VOP1Op.V_CEIL_F32: _VOP1Op_V_CEIL_F32, + VOP1Op.V_RNDNE_F32: _VOP1Op_V_RNDNE_F32, + VOP1Op.V_FLOOR_F32: _VOP1Op_V_FLOOR_F32, + VOP1Op.V_EXP_F32: _VOP1Op_V_EXP_F32, + VOP1Op.V_LOG_F32: _VOP1Op_V_LOG_F32, + VOP1Op.V_RCP_F32: _VOP1Op_V_RCP_F32, + VOP1Op.V_RCP_IFLAG_F32: _VOP1Op_V_RCP_IFLAG_F32, + VOP1Op.V_RSQ_F32: _VOP1Op_V_RSQ_F32, + VOP1Op.V_RCP_F64: _VOP1Op_V_RCP_F64, + VOP1Op.V_RSQ_F64: _VOP1Op_V_RSQ_F64, + VOP1Op.V_SQRT_F32: _VOP1Op_V_SQRT_F32, + VOP1Op.V_SQRT_F64: _VOP1Op_V_SQRT_F64, + VOP1Op.V_SIN_F32: _VOP1Op_V_SIN_F32, + VOP1Op.V_COS_F32: _VOP1Op_V_COS_F32, + VOP1Op.V_NOT_B32: _VOP1Op_V_NOT_B32, + VOP1Op.V_BFREV_B32: _VOP1Op_V_BFREV_B32, + VOP1Op.V_CLZ_I32_U32: _VOP1Op_V_CLZ_I32_U32, + VOP1Op.V_CTZ_I32_B32: _VOP1Op_V_CTZ_I32_B32, + VOP1Op.V_CLS_I32: _VOP1Op_V_CLS_I32, + VOP1Op.V_FREXP_EXP_I32_F64: _VOP1Op_V_FREXP_EXP_I32_F64, + VOP1Op.V_FREXP_MANT_F64: _VOP1Op_V_FREXP_MANT_F64, + VOP1Op.V_FRACT_F64: _VOP1Op_V_FRACT_F64, + VOP1Op.V_FREXP_EXP_I32_F32: _VOP1Op_V_FREXP_EXP_I32_F32, + VOP1Op.V_FREXP_MANT_F32: _VOP1Op_V_FREXP_MANT_F32, + VOP1Op.V_MOVRELS_B32: _VOP1Op_V_MOVRELS_B32, + VOP1Op.V_CVT_F16_U16: _VOP1Op_V_CVT_F16_U16, + VOP1Op.V_CVT_F16_I16: _VOP1Op_V_CVT_F16_I16, + VOP1Op.V_CVT_U16_F16: _VOP1Op_V_CVT_U16_F16, + VOP1Op.V_CVT_I16_F16: _VOP1Op_V_CVT_I16_F16, + VOP1Op.V_RCP_F16: _VOP1Op_V_RCP_F16, + VOP1Op.V_SQRT_F16: _VOP1Op_V_SQRT_F16, + VOP1Op.V_RSQ_F16: _VOP1Op_V_RSQ_F16, + VOP1Op.V_LOG_F16: _VOP1Op_V_LOG_F16, + VOP1Op.V_EXP_F16: _VOP1Op_V_EXP_F16, + VOP1Op.V_FREXP_MANT_F16: _VOP1Op_V_FREXP_MANT_F16, + VOP1Op.V_FREXP_EXP_I16_F16: _VOP1Op_V_FREXP_EXP_I16_F16, + VOP1Op.V_FLOOR_F16: _VOP1Op_V_FLOOR_F16, + VOP1Op.V_CEIL_F16: _VOP1Op_V_CEIL_F16, + VOP1Op.V_TRUNC_F16: _VOP1Op_V_TRUNC_F16, + VOP1Op.V_RNDNE_F16: _VOP1Op_V_RNDNE_F16, + VOP1Op.V_FRACT_F16: _VOP1Op_V_FRACT_F16, + VOP1Op.V_SIN_F16: _VOP1Op_V_SIN_F16, + VOP1Op.V_COS_F16: _VOP1Op_V_COS_F16, + VOP1Op.V_SAT_PK_U8_I16: _VOP1Op_V_SAT_PK_U8_I16, + VOP1Op.V_CVT_NORM_I16_F16: _VOP1Op_V_CVT_NORM_I16_F16, + VOP1Op.V_CVT_NORM_U16_F16: _VOP1Op_V_CVT_NORM_U16_F16, + VOP1Op.V_SWAP_B32: _VOP1Op_V_SWAP_B32, + VOP1Op.V_SWAP_B16: _VOP1Op_V_SWAP_B16, + VOP1Op.V_NOT_B16: _VOP1Op_V_NOT_B16, + VOP1Op.V_CVT_I32_I16: _VOP1Op_V_CVT_I32_I16, + VOP1Op.V_CVT_U32_U16: _VOP1Op_V_CVT_U32_U16, + VOP1Op.V_CVT_F32_FP8: _VOP1Op_V_CVT_F32_FP8, + VOP1Op.V_CVT_F32_BF8: _VOP1Op_V_CVT_F32_BF8, + VOP1Op.V_CVT_PK_F32_FP8: _VOP1Op_V_CVT_PK_F32_FP8, + VOP1Op.V_CVT_PK_F32_BF8: _VOP1Op_V_CVT_PK_F32_BF8, +} + +def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP2Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = S0.f64 + S1.f64 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = S0.f64 + S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 + S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 + S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 - S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 - S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S1.f32 - S0.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S1.f32 - S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = S0.f64 * S1.f64 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = S0.f64 * S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then + # // DX9 rules, 0.0 * x = 0.0 + # D0.f32 = 0.0F + # else + # D0.f32 = S0.f32 * S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): + D0.f32 = 0.0 + else: + D0.f32 = S0.f32 * S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 * S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 * S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (S0.i24) * (S1.i24) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (((S0.i24) * (S1.i24)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u24) * (S1.u24) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (((S0.u24) * (S1.u24)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(S0.f64) && isNAN(S1.f64)) then + # D0.f64 = cvtToQuietNAN(S0.f64) + # elsif isNAN(S0.f64) then + # D0.f64 = S1.f64 + # elsif isNAN(S1.f64) then + # D0.f64 = S0.f64 + # elsif ((S0.f64 < S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && sign(S0.f64) && + # !sign(S1.f64))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f64 = S0.f64 + # else + # D0.f64 = S1.f64 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): + TRAPSTS.INVALID = 1 + if (isNAN(S0.f64) and isNAN(S1.f64)): + D0.f64 = cvtToQuietNAN(S0.f64) + elif isNAN(S0.f64): + D0.f64 = S1.f64 + elif isNAN(S1.f64): + D0.f64 = S0.f64 + elif ((S0.f64 < S1.f64) or ((abs(S0.f64) == 0.0) and (abs(S1.f64) == 0.0) and sign(S0.f64) and not sign(S1.f64))): + D0.f64 = S0.f64 + else: + D0.f64 = S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP2Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(S0.f64) && isNAN(S1.f64)) then + # D0.f64 = cvtToQuietNAN(S0.f64) + # elsif isNAN(S0.f64) then + # D0.f64 = S1.f64 + # elsif isNAN(S1.f64) then + # D0.f64 = S0.f64 + # elsif ((S0.f64 > S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && !sign(S0.f64) && + # sign(S1.f64))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f64 = S0.f64 + # else + # D0.f64 = S1.f64 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): + TRAPSTS.INVALID = 1 + if (isNAN(S0.f64) and isNAN(S1.f64)): + D0.f64 = cvtToQuietNAN(S0.f64) + elif isNAN(S0.f64): + D0.f64 = S1.f64 + elif isNAN(S1.f64): + D0.f64 = S0.f64 + elif ((S0.f64 > S1.f64) or ((abs(S0.f64) == 0.0) and (abs(S1.f64) == 0.0) and not sign(S0.f64) and sign(S1.f64))): + D0.f64 = S0.f64 + else: + D0.f64 = S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif isNAN(64'F(S0.f32)) then + # D0.f32 = S1.f32 + # elsif isNAN(64'F(S1.f32)) then + # D0.f32 = S0.f32 + # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && + # !sign(S1.f32))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f32 = S0.f32 + # else + # D0.f32 = S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): + TRAPSTS.INVALID = 1 + if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif isNAN(F(S0.f32)): + D0.f32 = S1.f32 + elif isNAN(F(S1.f32)): + D0.f32 = S0.f32 + elif ((S0.f32 < S1.f32) or ((abs(S0.f32) == 0.0) and (abs(S1.f32) == 0.0) and sign(S0.f32) and not sign(S1.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif isNAN(64'F(S0.f32)) then + # D0.f32 = S1.f32 + # elsif isNAN(64'F(S1.f32)) then + # D0.f32 = S0.f32 + # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && + # sign(S1.f32))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f32 = S0.f32 + # else + # D0.f32 = S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): + TRAPSTS.INVALID = 1 + if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif isNAN(F(S0.f32)): + D0.f32 = S1.f32 + elif isNAN(F(S1.f32)): + D0.f32 = S0.f32 + elif ((S0.f32 > S1.f32) or ((abs(S0.f32) == 0.0) and (abs(S1.f32) == 0.0) and not sign(S0.f32) and sign(S1.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S1.u32 << S0[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S1.u32 << S0[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S1.u32 >> S0[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S1.u32 >> S0[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = (S1.i32 >> S0[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (S1.i32 >> S0[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 & S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 & S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 | S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 | S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 ^ S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 ^ S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~(S0.u32 ^ S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ~(S0.u32 ^ S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S1.u64 << S0[5 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64 = (S1.u64 << S0[5 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; + # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) + VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; + # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) + VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; + # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) + VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 + S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S0.u32 + S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 - S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S0.u32 - S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S1.u32 - S0.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S1.u32 - S0.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, S1.f32, D0.f32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, S1.f32, D0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SIMM32 = Reg(literal) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SIMM32 = Reg(literal) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # prev_mode = ROUND_MODE; + # tmp[15 : 0].f16 = f32_to_f16(S0.f32); + # tmp[31 : 16].f16 = f32_to_f16(S1.f32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + prev_mode = ROUND_MODE + tmp[15 : 0].f16 = f32_to_f16(S0.f32) + tmp[31 : 16].f16 = f32_to_f16(S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP2Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif isNAN(64'F(S0.f16)) then + # D0.f16 = S1.f16 + # elsif isNAN(64'F(S1.f16)) then + # D0.f16 = S0.f16 + # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && + # !sign(S1.f16))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f16 = S0.f16 + # else + # D0.f16 = S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): + TRAPSTS.INVALID = 1 + if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif isNAN(F(S0.f16)): + D0.f16 = S1.f16 + elif isNAN(F(S1.f16)): + D0.f16 = S0.f16 + elif ((S0.f16 < S1.f16) or ((abs(S0.f16) == 0.0) and (abs(S1.f16) == 0.0) and sign(S0.f16) and not sign(S1.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif isNAN(64'F(S0.f16)) then + # D0.f16 = S1.f16 + # elsif isNAN(64'F(S1.f16)) then + # D0.f16 = S0.f16 + # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && + # sign(S1.f16))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f16 = S0.f16 + # else + # D0.f16 = S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): + TRAPSTS.INVALID = 1 + if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif isNAN(F(S0.f16)): + D0.f16 = S1.f16 + elif isNAN(F(S1.f16)): + D0.f16 = S0.f16 + elif ((S0.f16 > S1.f16) or ((abs(S0.f16) == 0.0) and (abs(S1.f16) == 0.0) and not sign(S0.f16) and sign(S1.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 + S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 + S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 - S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 - S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S1.f16 - S0.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S1.f16 - S0.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 * S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 * S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = fma(S0.f16, S1.f16, D0.f16) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = fma(S0.f16, S1.f16, D0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SIMM32 = Reg(literal) + # --- compiled pseudocode --- + D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + SIMM32 = Reg(literal) + # --- compiled pseudocode --- + D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16); + # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) + D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +VOP2Op_FUNCTIONS = { + VOP2Op.V_CNDMASK_B32: _VOP2Op_V_CNDMASK_B32, + VOP2Op.V_ADD_F64: _VOP2Op_V_ADD_F64, + VOP2Op.V_ADD_F32: _VOP2Op_V_ADD_F32, + VOP2Op.V_SUB_F32: _VOP2Op_V_SUB_F32, + VOP2Op.V_SUBREV_F32: _VOP2Op_V_SUBREV_F32, + VOP2Op.V_MUL_F64: _VOP2Op_V_MUL_F64, + VOP2Op.V_MUL_DX9_ZERO_F32: _VOP2Op_V_MUL_DX9_ZERO_F32, + VOP2Op.V_MUL_F32: _VOP2Op_V_MUL_F32, + VOP2Op.V_MUL_I32_I24: _VOP2Op_V_MUL_I32_I24, + VOP2Op.V_MUL_HI_I32_I24: _VOP2Op_V_MUL_HI_I32_I24, + VOP2Op.V_MUL_U32_U24: _VOP2Op_V_MUL_U32_U24, + VOP2Op.V_MUL_HI_U32_U24: _VOP2Op_V_MUL_HI_U32_U24, + VOP2Op.V_MIN_NUM_F64: _VOP2Op_V_MIN_NUM_F64, + VOP2Op.V_MAX_NUM_F64: _VOP2Op_V_MAX_NUM_F64, + VOP2Op.V_MIN_I32: _VOP2Op_V_MIN_I32, + VOP2Op.V_MAX_I32: _VOP2Op_V_MAX_I32, + VOP2Op.V_MIN_U32: _VOP2Op_V_MIN_U32, + VOP2Op.V_MAX_U32: _VOP2Op_V_MAX_U32, + VOP2Op.V_MIN_NUM_F32: _VOP2Op_V_MIN_NUM_F32, + VOP2Op.V_MAX_NUM_F32: _VOP2Op_V_MAX_NUM_F32, + VOP2Op.V_LSHLREV_B32: _VOP2Op_V_LSHLREV_B32, + VOP2Op.V_LSHRREV_B32: _VOP2Op_V_LSHRREV_B32, + VOP2Op.V_ASHRREV_I32: _VOP2Op_V_ASHRREV_I32, + VOP2Op.V_AND_B32: _VOP2Op_V_AND_B32, + VOP2Op.V_OR_B32: _VOP2Op_V_OR_B32, + VOP2Op.V_XOR_B32: _VOP2Op_V_XOR_B32, + VOP2Op.V_XNOR_B32: _VOP2Op_V_XNOR_B32, + VOP2Op.V_LSHLREV_B64: _VOP2Op_V_LSHLREV_B64, + VOP2Op.V_ADD_CO_CI_U32: _VOP2Op_V_ADD_CO_CI_U32, + VOP2Op.V_SUB_CO_CI_U32: _VOP2Op_V_SUB_CO_CI_U32, + VOP2Op.V_SUBREV_CO_CI_U32: _VOP2Op_V_SUBREV_CO_CI_U32, + VOP2Op.V_ADD_NC_U32: _VOP2Op_V_ADD_NC_U32, + VOP2Op.V_SUB_NC_U32: _VOP2Op_V_SUB_NC_U32, + VOP2Op.V_SUBREV_NC_U32: _VOP2Op_V_SUBREV_NC_U32, + VOP2Op.V_FMAC_F32: _VOP2Op_V_FMAC_F32, + VOP2Op.V_FMAMK_F32: _VOP2Op_V_FMAMK_F32, + VOP2Op.V_FMAAK_F32: _VOP2Op_V_FMAAK_F32, + VOP2Op.V_CVT_PK_RTZ_F16_F32: _VOP2Op_V_CVT_PK_RTZ_F16_F32, + VOP2Op.V_MIN_NUM_F16: _VOP2Op_V_MIN_NUM_F16, + VOP2Op.V_MAX_NUM_F16: _VOP2Op_V_MAX_NUM_F16, + VOP2Op.V_ADD_F16: _VOP2Op_V_ADD_F16, + VOP2Op.V_SUB_F16: _VOP2Op_V_SUB_F16, + VOP2Op.V_SUBREV_F16: _VOP2Op_V_SUBREV_F16, + VOP2Op.V_MUL_F16: _VOP2Op_V_MUL_F16, + VOP2Op.V_FMAC_F16: _VOP2Op_V_FMAC_F16, + VOP2Op.V_FMAMK_F16: _VOP2Op_V_FMAMK_F16, + VOP2Op.V_FMAAK_F16: _VOP2Op_V_FMAAK_F16, + VOP2Op.V_LDEXP_F16: _VOP2Op_V_LDEXP_F16, + VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16, +} + +def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.b32 = S0.b32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b32 = S0.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare lane : 32'U; + # if WAVE64 then + # // 64 lanes + # if EXEC == 0x0LL then + # lane = 0U; + # // Force lane 0 if all lanes are disabled + # else + # lane = 32'U(s_ff1_i32_b64(EXEC)); + # // Lowest active lane + # endif + # else + # // 32 lanes + # if EXEC_LO.i32 == 0 then + # lane = 0U; + # // Force lane 0 if all lanes are disabled + # else + # lane = 32'U(s_ff1_i32_b32(EXEC_LO)); + # // Lowest active lane + # endif + # endif; + # D0.b32 = VGPR[lane][SRC0.u32] + D0 = Reg(d0) + EXEC = Reg(exec_mask) + SRC0 = Reg(src0_idx) + EXEC_LO = SliceProxy(EXEC, 31, 0) + # --- compiled pseudocode --- + if WAVE64: + if EXEC == 0x0: + lane = 0 + else: + lane = (s_ff1_i32_b64(EXEC)) + else: + if EXEC_LO.i32 == 0: + lane = 0 + else: + lane = (s_ff1_i32_b32(EXEC_LO)) + D0.b32 = VGPR[lane][SRC0.u32] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + return result + +def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f64_to_i32(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f64_to_i32(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = i32_to_f64(S0.i32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = i32_to_f64(S0.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = i32_to_f32(S0.i32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = i32_to_f32(S0.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0.u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = f32_to_u32(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = f32_to_u32(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = f32_to_f16(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = f32_to_f16(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = f16_to_f32(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = f16_to_f32(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = f32_to_i32(floor(S0.f32)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = f32_to_i32(floor(S0.f32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = f64_to_f32(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = f64_to_f32(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = f32_to_f64(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = f32_to_f64(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[7 : 0].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[7 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[15 : 8].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[15 : 8].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[23 : 16].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[23 : 16].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = u32_to_f32(S0[31 : 24].u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = u32_to_f32(S0[31 : 24].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = f64_to_u32(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = f64_to_u32(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = u32_to_f64(S0.u32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = u32_to_f64(S0.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = trunc(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = trunc(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = trunc(S0.f64); + # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then + # D0.f64 += 1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = trunc(S0.f64) + if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): + D0.f64 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = floor(S0.f64 + 0.5); + # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then + # D0.f64 -= 1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = floor(S0.f64 + 0.5) + if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): + D0.f64 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = trunc(S0.f64); + # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then + # D0.f64 += -1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = trunc(S0.f64) + if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): + D0.f64 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.b16 = S0.b16 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b16 = S0.b16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 + -floor(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 + -floor(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32); + # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then + # D0.f32 += 1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): + D0.f32 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = floor(S0.f32 + 0.5F); + # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then + # D0.f32 -= 1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = floor(S0.f32 + 0.5) + if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): + D0.f32 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = trunc(S0.f32); + # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then + # D0.f32 += -1.0F + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = trunc(S0.f32) + if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): + D0.f32 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = pow(2.0F, S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = pow(2.0, S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = log2(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = log2(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / S0.f32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / S0.f32; + # // Can only raise integer DIV_BY_ZERO exception + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / sqrt(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / sqrt(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = 1.0 / S0.f64 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = 1.0 / S0.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = 1.0 / sqrt(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = 1.0 / sqrt(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = sqrt(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = sqrt(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = sqrt(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = sqrt(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = sin(S0.f32 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = cos(S0.f32 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~S0.u32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ~S0.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32[31 : 0] = S0.u32[0 : 31] + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32[31 : 0] = S0.u32[0 : 31] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = -1; + # // Set if no ones are found + # for i in 0 : 31 do + # // Search from MSB + # if S0.u32[31 - i] == 1'1U then + # D0.i32 = i; + # endif + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = -1 + for i in range(0, int(31)+1): + if S0.u32[31 - i] == 1: + D0.i32 = i; break + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = -1; + # // Set if no ones are found + # for i in 0 : 31 do + # // Search from LSB + # if S0.u32[i] == 1'1U then + # D0.i32 = i; + # endif + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = -1 + for i in range(0, int(31)+1): + if S0.u32[i] == 1: + D0.i32 = i; break + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = -1; + # // Set if all bits are the same + # for i in 1 : 31 do + # // Search from MSB + # if S0.i32[31 - i] != S0.i32[31] then + # D0.i32 = i; + # endif + # endfor + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = -1 + for i in range(1, int(31)+1): + if S0.i32[31 - i] != S0.i32[31]: + D0.i32 = i + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then + # D0.i32 = 0 + # else + # D0.i32 = exponent(S0.f64) - 1023 + 1 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): + D0.i32 = 0 + else: + D0.i32 = exponent(S0.f64) - 1023 + 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then + # D0.f64 = S0.f64 + # else + # D0.f64 = mantissa(S0.f64) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): + D0.f64 = S0.f64 + else: + D0.f64 = mantissa(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = S0.f64 + -floor(S0.f64) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = S0.f64 + -floor(S0.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then + # D0.i32 = 0 + # else + # D0.i32 = exponent(S0.f32) - 127 + 1 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): + D0.i32 = 0 + else: + D0.i32 = exponent(S0.f32) - 127 + 1 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then + # D0.f32 = S0.f32 + # else + # D0.f32 = mantissa(S0.f32) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = mantissa(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # addr = SRC0.u32; + # // Raw value from instruction + # D0.b32 = VGPR[laneId][addr].b32 + D0 = Reg(d0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + addr = SRC0.u32 + D0.b32 = VGPR[laneId][addr].b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = u16_to_f16(S0.u16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = u16_to_f16(S0.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = i16_to_f16(S0.i16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = i16_to_f16(S0.i16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = f16_to_u16(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = f16_to_u16(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = f16_to_i16(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = f16_to_i16(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = 16'1.0 / S0.f16 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = 1.0 / S0.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = sqrt(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = sqrt(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = 16'1.0 / sqrt(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = 1.0 / sqrt(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = log2(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = log2(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = pow(16'2.0, S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = pow(2.0, S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then + # D0.f16 = S0.f16 + # else + # D0.f16 = mantissa(S0.f16) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = mantissa(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then + # D0.i16 = 16'0 + # else + # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): + D0.i16 = 0 + else: + D0.i16 = (exponent(S0.f16) - 15 + 1) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = trunc(S0.f16); + # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then + # D0.f16 += -16'1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = trunc(S0.f16) + if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): + D0.f16 += -1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = trunc(S0.f16); + # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then + # D0.f16 += 16'1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = trunc(S0.f16) + if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): + D0.f16 += 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = trunc(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = trunc(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = floor(S0.f16 + 16'0.5); + # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then + # D0.f16 -= 16'1.0 + # endif + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = floor(S0.f16 + 0.5) + if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): + D0.f16 -= 1.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 + -floor(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 + -floor(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = sin(S0.f16 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = cos(S0.f16 * F(PI * 2.0)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if n <= 16'0 then + # elsif n >= 16'255 then + # else + # endif); + # tmp = 16'0; + # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); + # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); + # D0.b16 = tmp.b16 + S0 = Reg(s0) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + if n <= 0: + pass + elif n >= 255: + pass + else: + pass + tmp = Reg(0) + tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) + tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) + D0.b16 = tmp.b16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = f16_to_snorm(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = f16_to_snorm(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = f16_to_unorm(S0.f16) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = f16_to_unorm(S0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = ~S0.u16 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = ~S0.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(signext(S0.i16)) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (signext(S0.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0 = { 16'0, S0.u16 } + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0 = Reg(_pack(0, S0.u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if OPSEL[1 : 0].u2 == 2'0U then + # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].fp8) + # elsif OPSEL[1 : 0].u2 == 2'2U then + # // Byte select bits are reversed + # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].fp8) + # elsif OPSEL[1 : 0].u2 == 2'1U then + # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].fp8) + # else + # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].fp8) + # endif + D0 = Reg(d0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + if OPSEL[1 : 0].u2 == 0: + D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].fp8) + elif OPSEL[1 : 0].u2 == 2: + D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].fp8) + elif OPSEL[1 : 0].u2 == 1: + D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].fp8) + else: + D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].fp8) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if OPSEL[1 : 0].u2 == 2'0U then + # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].bf8) + # elsif OPSEL[1 : 0].u2 == 2'2U then + # // Byte select bits are reversed + # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].bf8) + # elsif OPSEL[1 : 0].u2 == 2'1U then + # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].bf8) + # else + # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].bf8) + # endif + D0 = Reg(d0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + if OPSEL[1 : 0].u2 == 0: + D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].bf8) + elif OPSEL[1 : 0].u2 == 2: + D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].bf8) + elif OPSEL[1 : 0].u2 == 1: + D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].bf8) + else: + D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].bf8) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; + # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); + # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) + D0 = Reg(d0) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + tmp = Reg(((VGPR[laneId][SRC0.u32][31 : 16]) if (OPSEL[0].u1) else (VGPR[laneId][SRC0.u32][15 : 0]))) + D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8) + D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; + # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); + # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) + D0 = Reg(d0) + tmp = Reg(0) + laneId = lane + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + tmp = Reg(((VGPR[laneId][SRC0.u32][31 : 16]) if (OPSEL[0].u1) else (VGPR[laneId][SRC0.u32][15 : 0]))) + D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8) + D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = S0.f64 + S1.f64 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = S0.f64 + S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 + S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 + S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 - S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 - S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S1.f32 - S0.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S1.f32 - S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = S0.f64 * S1.f64 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = S0.f64 * S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then + # // DX9 rules, 0.0 * x = 0.0 + # D0.f32 = 0.0F + # else + # D0.f32 = S0.f32 * S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): + D0.f32 = 0.0 + else: + D0.f32 = S0.f32 * S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 * S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 * S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (S0.i24) * (S1.i24) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (((S0.i24) * (S1.i24)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u24) * (S1.u24) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (((S0.u24) * (S1.u24)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(S0.f64) && isNAN(S1.f64)) then + # D0.f64 = cvtToQuietNAN(S0.f64) + # elsif isNAN(S0.f64) then + # D0.f64 = S1.f64 + # elsif isNAN(S1.f64) then + # D0.f64 = S0.f64 + # elsif ((S0.f64 < S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && sign(S0.f64) && + # !sign(S1.f64))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f64 = S0.f64 + # else + # D0.f64 = S1.f64 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): + TRAPSTS.INVALID = 1 + if (isNAN(S0.f64) and isNAN(S1.f64)): + D0.f64 = cvtToQuietNAN(S0.f64) + elif isNAN(S0.f64): + D0.f64 = S1.f64 + elif isNAN(S1.f64): + D0.f64 = S0.f64 + elif ((S0.f64 < S1.f64) or ((abs(S0.f64) == 0.0) and (abs(S1.f64) == 0.0) and sign(S0.f64) and not sign(S1.f64))): + D0.f64 = S0.f64 + else: + D0.f64 = S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(S0.f64) && isNAN(S1.f64)) then + # D0.f64 = cvtToQuietNAN(S0.f64) + # elsif isNAN(S0.f64) then + # D0.f64 = S1.f64 + # elsif isNAN(S1.f64) then + # D0.f64 = S0.f64 + # elsif ((S0.f64 > S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && !sign(S0.f64) && + # sign(S1.f64))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f64 = S0.f64 + # else + # D0.f64 = S1.f64 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): + TRAPSTS.INVALID = 1 + if (isNAN(S0.f64) and isNAN(S1.f64)): + D0.f64 = cvtToQuietNAN(S0.f64) + elif isNAN(S0.f64): + D0.f64 = S1.f64 + elif isNAN(S1.f64): + D0.f64 = S0.f64 + elif ((S0.f64 > S1.f64) or ((abs(S0.f64) == 0.0) and (abs(S1.f64) == 0.0) and not sign(S0.f64) and sign(S1.f64))): + D0.f64 = S0.f64 + else: + D0.f64 = S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif isNAN(64'F(S0.f32)) then + # D0.f32 = S1.f32 + # elsif isNAN(64'F(S1.f32)) then + # D0.f32 = S0.f32 + # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && + # !sign(S1.f32))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f32 = S0.f32 + # else + # D0.f32 = S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): + TRAPSTS.INVALID = 1 + if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif isNAN(F(S0.f32)): + D0.f32 = S1.f32 + elif isNAN(F(S1.f32)): + D0.f32 = S0.f32 + elif ((S0.f32 < S1.f32) or ((abs(S0.f32) == 0.0) and (abs(S1.f32) == 0.0) and sign(S0.f32) and not sign(S1.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif isNAN(64'F(S0.f32)) then + # D0.f32 = S1.f32 + # elsif isNAN(64'F(S1.f32)) then + # D0.f32 = S0.f32 + # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && + # sign(S1.f32))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f32 = S0.f32 + # else + # D0.f32 = S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): + TRAPSTS.INVALID = 1 + if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif isNAN(F(S0.f32)): + D0.f32 = S1.f32 + elif isNAN(F(S1.f32)): + D0.f32 = S0.f32 + elif ((S0.f32 > S1.f32) or ((abs(S0.f32) == 0.0) and (abs(S1.f32) == 0.0) and not sign(S0.f32) and sign(S1.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S1.u32 << S0[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S1.u32 << S0[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S1.u32 >> S0[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S1.u32 >> S0[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = (S1.i32 >> S0[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (S1.i32 >> S0[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 & S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 & S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 | S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 | S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 ^ S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 ^ S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ~(S0.u32 ^ S1.u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ~(S0.u32 ^ S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S1.u64 << S0[5 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64 = (S1.u64 << S0[5 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 + S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S0.u32 + S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 - S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S0.u32 - S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S1.u32 - S0.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S1.u32 - S0.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, S1.f32, D0.f32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, S1.f32, D0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # prev_mode = ROUND_MODE; + # tmp[15 : 0].f16 = f32_to_f16(S0.f32); + # tmp[31 : 16].f16 = f32_to_f16(S1.f32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + prev_mode = ROUND_MODE + tmp[15 : 0].f16 = f32_to_f16(S0.f32) + tmp[31 : 16].f16 = f32_to_f16(S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif isNAN(64'F(S0.f16)) then + # D0.f16 = S1.f16 + # elsif isNAN(64'F(S1.f16)) then + # D0.f16 = S0.f16 + # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && + # !sign(S1.f16))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f16 = S0.f16 + # else + # D0.f16 = S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): + TRAPSTS.INVALID = 1 + if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif isNAN(F(S0.f16)): + D0.f16 = S1.f16 + elif isNAN(F(S1.f16)): + D0.f16 = S0.f16 + elif ((S0.f16 < S1.f16) or ((abs(S0.f16) == 0.0) and (abs(S1.f16) == 0.0) and sign(S0.f16) and not sign(S1.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then + # TRAPSTS.INVALID = 1 + # endif; + # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif isNAN(64'F(S0.f16)) then + # D0.f16 = S1.f16 + # elsif isNAN(64'F(S1.f16)) then + # D0.f16 = S0.f16 + # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && + # sign(S1.f16))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f16 = S0.f16 + # else + # D0.f16 = S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): + TRAPSTS.INVALID = 1 + if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif isNAN(F(S0.f16)): + D0.f16 = S1.f16 + elif isNAN(F(S1.f16)): + D0.f16 = S0.f16 + elif ((S0.f16 > S1.f16) or ((abs(S0.f16) == 0.0) and (abs(S1.f16) == 0.0) and not sign(S0.f16) and sign(S1.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 + S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 + S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 - S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 - S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S1.f16 - S0.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S1.f16 - S0.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 * S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 * S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = fma(S0.f16, S1.f16, D0.f16) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = fma(S0.f16, S1.f16, D0.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then + # // DX9 rules, 0.0 * x = 0.0 + # D0.f32 = S2.f32 + # else + # D0.f32 = fma(S0.f32, S1.f32, S2.f32) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): + D0.f32 = S2.f32 + else: + D0.f32 = fma(S0.f32, S1.f32, S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (S0.i24) * (S1.i24) + S2.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u24) * (S1.u24) + S2.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). + # // XYZ coordinate is given in (S0.f, S1.f, S2.f). + # // S0.f = x + # // S1.f = y + # // S2.f = z + # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then + # if S2.f32 < 0.0F then + # D0.f32 = 5.0F + # else + # D0.f32 = 4.0F + # endif + # elsif abs(S1.f32) >= abs(S0.f32) then + # if S1.f32 < 0.0F then + # D0.f32 = 3.0F + # else + # D0.f32 = 2.0F + # endif + # else + # if S0.f32 < 0.0F then + # D0.f32 = 1.0F + # else + # D0.f32 = 0.0F + # endif + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): + if S2.f32 < 0.0: + D0.f32 = 5.0 + else: + D0.f32 = 4.0 + elif abs(S1.f32) >= abs(S0.f32): + if S1.f32 < 0.0: + D0.f32 = 3.0 + else: + D0.f32 = 2.0 + else: + if S0.f32 < 0.0: + D0.f32 = 1.0 + else: + D0.f32 = 0.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // D0.f = cubemap S coordinate. + # // XYZ coordinate is given in (S0.f, S1.f, S2.f). + # // S0.f = x + # // S1.f = y + # // S2.f = z + # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then + # if S2.f32 < 0.0F then + # D0.f32 = -S0.f32 + # else + # D0.f32 = S0.f32 + # endif + # elsif abs(S1.f32) >= abs(S0.f32) then + # D0.f32 = S0.f32 + # else + # if S0.f32 < 0.0F then + # D0.f32 = S2.f32 + # else + # D0.f32 = -S2.f32 + # endif + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): + if S2.f32 < 0.0: + D0.f32 = -S0.f32 + else: + D0.f32 = S0.f32 + elif abs(S1.f32) >= abs(S0.f32): + D0.f32 = S0.f32 + else: + if S0.f32 < 0.0: + D0.f32 = S2.f32 + else: + D0.f32 = -S2.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // D0.f = cubemap T coordinate. + # // XYZ coordinate is given in (S0.f, S1.f, S2.f). + # // S0.f = x + # // S1.f = y + # // S2.f = z + # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then + # D0.f32 = -S1.f32 + # elsif abs(S1.f32) >= abs(S0.f32) then + # if S1.f32 < 0.0F then + # D0.f32 = -S2.f32 + # else + # D0.f32 = S2.f32 + # endif + # else + # D0.f32 = -S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): + D0.f32 = -S1.f32 + elif abs(S1.f32) >= abs(S0.f32): + if S1.f32 < 0.0: + D0.f32 = -S2.f32 + else: + D0.f32 = S2.f32 + else: + D0.f32 = -S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // D0.f = 2.0 * cubemap major axis. + # // XYZ coordinate is given in (S0.f, S1.f, S2.f). + # // S0.f = x + # // S1.f = y + # // S2.f = z + # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then + # D0.f32 = S2.f32 * 2.0F + # elsif abs(S1.f32) >= abs(S0.f32) then + # D0.f32 = S1.f32 * 2.0F + # else + # D0.f32 = S0.f32 * 2.0F + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): + D0.f32 = S2.f32 * 2.0 + elif abs(S1.f32) >= abs(S0.f32): + D0.f32 = S1.f32 * 2.0 + else: + D0.f32 = S0.f32 * 2.0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); + # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) + D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = fma(S0.f32, S1.f32, S2.f32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = fma(S0.f32, S1.f32, S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = fma(S0.f64, S1.f64, S2.f64) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = fma(S0.f64, S1.f64, S2.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); + # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); + # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); + # tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1U); + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1 << 24)) + tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1 << 16) + tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1 << 8) + tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0]) & 0xffffffffLL) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((_pack32(S0.u32, S1.u32) >> S2.u32[4 : 0]) & 0xffffffff) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0] * 8U)) & 0xffffffffLL) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((_pack32(S0.u32, S1.u32) >> (S2.u32[1 : 0] * 8)) & 0xffffffff) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if ((S1.f32 == -MAX_FLOAT_F32) || (64'F(S1.f32) == -INF) || isNAN(64'F(S1.f32)) || (S2.f32 <= 0.0F) || + # isNAN(64'F(S2.f32))) then + # D0.f32 = -MAX_FLOAT_F32 + # else + # D0.f32 = S0.f32 * S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if ((S1.f32 == -MAX_FLOAT_F32) or (F(S1.f32) == (-INF)) or isNAN(F(S1.f32)) or (S2.f32 <= 0.0) or isNAN(F(S2.f32))): + D0.f32 = -MAX_FLOAT_F32 + else: + D0.f32 = S0.f32 * S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then + # D0.i32 = v_max_i32(S1.i32, S2.i32) + # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then + # D0.i32 = v_max_i32(S0.i32, S2.i32) + # else + # D0.i32 = v_max_i32(S0.i32, S1.i32) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32: + D0.i32 = v_max_i32(S1.i32, S2.i32) + elif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32: + D0.i32 = v_max_i32(S0.i32, S2.i32) + else: + D0.i32 = v_max_i32(S0.i32, S1.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then + # D0.u32 = v_max_u32(S1.u32, S2.u32) + # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then + # D0.u32 = v_max_u32(S0.u32, S2.u32) + # else + # D0.u32 = v_max_u32(S0.u32, S1.u32) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32: + D0.u32 = v_max_u32(S1.u32, S2.u32) + elif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32: + D0.u32 = v_max_u32(S0.u32, S2.u32) + else: + D0.u32 = v_max_u32(S0.u32, S1.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // UNSIGNED comparison + # tmp = S2.u32; + # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); + # tmp += 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); + # tmp += 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); + # tmp += 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.u32) + tmp += (ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])) + tmp += (ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])) + tmp += (ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])) + tmp += (ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // UNSIGNED comparison + # tmp = S2.u32; + # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); + # tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.u32) + tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16) + tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // UNSIGNED comparison + # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); + # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg((S2.u32 & (~(0xff << (S1.u32[1 : 0].u32 * 8))))) + tmp = Reg((tmp | (((f32_to_u8(S0.f32)) & 255) << (S1.u32[1 : 0].u32 * 8)))) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # sign_out = (sign(S1.f32) ^ sign(S2.f32)); + # if isNAN(64'F(S2.f32)) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) + # elsif isNAN(64'F(S1.f32)) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) + # elsif ((64'F(S1.f32) == 0.0) && (64'F(S2.f32) == 0.0)) then + # // 0/0 + # D0.f32 = 32'F(0xffc00000) + # elsif ((64'F(abs(S1.f32)) == +INF) && (64'F(abs(S2.f32)) == +INF)) then + # // inf/inf + # D0.f32 = 32'F(0xffc00000) + # elsif ((64'F(S1.f32) == 0.0) || (64'F(abs(S2.f32)) == +INF)) then + # // x/0, or inf/y + # D0.f32 = sign_out ? -INF.f32 : +INF.f32 + # elsif ((64'F(abs(S1.f32)) == +INF) || (64'F(S2.f32) == 0.0)) then + # // x/inf, 0/y + # D0.f32 = sign_out ? -0.0F : 0.0F + # elsif exponent(S2.f32) - exponent(S1.f32) < -150 then + # D0.f32 = sign_out ? -UNDERFLOW_F32 : UNDERFLOW_F32 + # elsif exponent(S1.f32) == 255 then + # D0.f32 = sign_out ? -OVERFLOW_F32 : OVERFLOW_F32 + # else + # D0.f32 = sign_out ? -abs(S0.f32) : abs(S0.f32) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + sign_out = (sign(S1.f32) ^ sign(S2.f32)) + if isNAN(F(S2.f32)): + D0.f32 = F(cvtToQuietNAN(F(S2.f32))) + elif isNAN(F(S1.f32)): + D0.f32 = F(cvtToQuietNAN(F(S1.f32))) + elif ((F(S1.f32) == 0.0) and (F(S2.f32) == 0.0)): + D0.f32 = F(0xffc00000) + elif ((F(abs(S1.f32)) == INF) and (F(abs(S2.f32)) == INF)): + D0.f32 = F(0xffc00000) + elif ((F(S1.f32) == 0.0) or (F(abs(S2.f32)) == INF)): + D0.f32 = (((-INF).f32) if (sign_out) else (INF.f32)) + elif ((F(abs(S1.f32)) == INF) or (F(S2.f32) == 0.0)): + D0.f32 = ((-0.0) if (sign_out) else (0.0)) + elif exponent(S2.f32) - exponent(S1.f32) < -150: + D0.f32 = ((-UNDERFLOW_F32) if (sign_out) else (UNDERFLOW_F32)) + elif exponent(S1.f32) == 255: + D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) + else: + D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) if isNAN(S0.f32) else ((-abs(S0.f32)) if (sign_out) else (abs(S0.f32))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # sign_out = (sign(S1.f64) ^ sign(S2.f64)); + # if isNAN(S2.f64) then + # D0.f64 = cvtToQuietNAN(S2.f64) + # elsif isNAN(S1.f64) then + # D0.f64 = cvtToQuietNAN(S1.f64) + # elsif ((S1.f64 == 0.0) && (S2.f64 == 0.0)) then + # // 0/0 + # D0.f64 = 64'F(0xfff8000000000000LL) + # elsif ((abs(S1.f64) == +INF) && (abs(S2.f64) == +INF)) then + # // inf/inf + # D0.f64 = 64'F(0xfff8000000000000LL) + # elsif ((S1.f64 == 0.0) || (abs(S2.f64) == +INF)) then + # // x/0, or inf/y + # D0.f64 = sign_out ? -INF : +INF + # elsif ((abs(S1.f64) == +INF) || (S2.f64 == 0.0)) then + # // x/inf, 0/y + # D0.f64 = sign_out ? -0.0 : 0.0 + # elsif exponent(S2.f64) - exponent(S1.f64) < -1075 then + # D0.f64 = sign_out ? -UNDERFLOW_F64 : UNDERFLOW_F64 + # elsif exponent(S1.f64) == 2047 then + # D0.f64 = sign_out ? -OVERFLOW_F64 : OVERFLOW_F64 + # else + # D0.f64 = sign_out ? -abs(S0.f64) : abs(S0.f64) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + sign_out = (sign(S1.f64) ^ sign(S2.f64)) + if isNAN(S2.f64): + D0.f64 = cvtToQuietNAN(S2.f64) + elif isNAN(S1.f64): + D0.f64 = cvtToQuietNAN(S1.f64) + elif ((S1.f64 == 0.0) and (S2.f64 == 0.0)): + D0.f64 = F(0xfff8000000000000) + elif ((abs(S1.f64) == INF) and (abs(S2.f64) == INF)): + D0.f64 = F(0xfff8000000000000) + elif ((S1.f64 == 0.0) or (abs(S2.f64) == INF)): + D0.f64 = (((-INF)) if (sign_out) else (INF)) + elif ((abs(S1.f64) == INF) or (S2.f64 == 0.0)): + D0.f64 = ((-0.0) if (sign_out) else (0.0)) + elif exponent(S2.f64) - exponent(S1.f64) < -1075: + D0.f64 = ((-UNDERFLOW_F64) if (sign_out) else (UNDERFLOW_F64)) + elif exponent(S1.f64) == 2047: + D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) + else: + D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_MIN3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = v_min_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = v_min_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAX3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = v_max_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = v_max_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MIN3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = v_min_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = v_min_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAX3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = v_max_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = v_max_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = v_minimum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = v_minimum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = v_maximum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = v_maximum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MED3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then + # D0.f32 = v_min3_num_f32(S0.f32, S1.f32, S2.f32) + # elsif v_max3_num_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then + # D0.f32 = v_max_num_f32(S1.f32, S2.f32) + # elsif v_max3_num_f32(S0.f32, S1.f32, S2.f32) == S1.f32 then + # D0.f32 = v_max_num_f32(S0.f32, S2.f32) + # else + # D0.f32 = v_max_num_f32(S0.f32, S1.f32) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isNAN(F(S0.f32)) or isNAN(F(S1.f32)) or isNAN(F(S2.f32))): + D0.f32 = v_min3_num_f32(S0.f32, S1.f32, S2.f32) + elif v_max3_num_f32(S0.f32, S1.f32, S2.f32) == S0.f32: + D0.f32 = v_max_num_f32(S1.f32, S2.f32) + elif v_max3_num_f32(S0.f32, S1.f32, S2.f32) == S1.f32: + D0.f32 = v_max_num_f32(S0.f32, S2.f32) + else: + D0.f32 = v_max_num_f32(S0.f32, S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MED3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then + # D0.f16 = v_min3_num_f16(S0.f16, S1.f16, S2.f16) + # elsif v_max3_num_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then + # D0.f16 = v_max_num_f16(S1.f16, S2.f16) + # elsif v_max3_num_f16(S0.f16, S1.f16, S2.f16) == S1.f16 then + # D0.f16 = v_max_num_f16(S0.f16, S2.f16) + # else + # D0.f16 = v_max_num_f16(S0.f16, S1.f16) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isNAN(F(S0.f16)) or isNAN(F(S1.f16)) or isNAN(F(S2.f16))): + D0.f16 = v_min3_num_f16(S0.f16, S1.f16, S2.f16) + elif v_max3_num_f16(S0.f16, S1.f16, S2.f16) == S0.f16: + D0.f16 = v_max_num_f16(S1.f16, S2.f16) + elif v_max3_num_f16(S0.f16, S1.f16, S2.f16) == S1.f16: + D0.f16 = v_max_num_f16(S0.f16, S2.f16) + else: + D0.f16 = v_max_num_f16(S0.f16, S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if VCC.u64[laneId] then + # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) + # else + # D0.f32 = fma(S0.f32, S1.f32, S2.f32) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + if VCC.u64[laneId]: + D0.f32 = (2.0 ** 64 if exponent(S2.f32) > 127 else 2.0 ** -64) * fma(S0.f32, S1.f32, S2.f32) + else: + D0.f32 = fma(S0.f32, S1.f32, S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if VCC.u64[laneId] then + # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) + # else + # D0.f64 = fma(S0.f64, S1.f64, S2.f64) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + if VCC.u64[laneId]: + D0.f64 = (2.0 ** 128 if exponent(S2.f64) > 1023 else 2.0 ** -128) * fma(S0.f64, S1.f64, S2.f64) + else: + D0.f64 = fma(S0.f64, S1.f64, S2.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # // UNSIGNED comparison + # tmp = S2.u32; + # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); + # tmp += S1.u32[15 : 8] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); + # tmp += S1.u32[23 : 16] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); + # tmp += S1.u32[31 : 24] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.u32) + tmp += ((0) if (S1.u32[7 : 0] == 0) else ((ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])))) + tmp += ((0) if (S1.u32[15 : 8] == 0) else ((ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])))) + tmp += ((0) if (S1.u32[23 : 16] == 0) else ((ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])))) + tmp += ((0) if (S1.u32[31 : 24] == 0) else ((ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])))) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 * S1.u16 + S2.u16 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S0.u16 * S1.u16 + S2.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = fma(S0.f16, S1.f16, S2.f16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = fma(S0.f16, S1.f16, S2.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then + # D0.i16 = v_max_i16(S1.i16, S2.i16) + # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then + # D0.i16 = v_max_i16(S0.i16, S2.i16) + # else + # D0.i16 = v_max_i16(S0.i16, S1.i16) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16: + D0.i16 = v_max_i16(S1.i16, S2.i16) + elif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16: + D0.i16 = v_max_i16(S0.i16, S2.i16) + else: + D0.i16 = v_max_i16(S0.i16, S1.i16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then + # D0.u16 = v_max_u16(S1.u16, S2.u16) + # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then + # D0.u16 = v_max_u16(S0.u16, S2.u16) + # else + # D0.u16 = v_max_u16(S0.u16, S1.u16) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16: + D0.u16 = v_max_u16(S1.u16, S2.u16) + elif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16: + D0.u16 = v_max_u16(S0.u16, S2.u16) + else: + D0.u16 = v_max_u16(S0.u16, S1.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = S0.i16 * S1.i16 + S2.i16 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = S0.i16 * S1.i16 + S2.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # sign_out = (sign(S1.f16) ^ sign(S2.f16)); + # if isNAN(64'F(S2.f16)) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) + # elsif isNAN(64'F(S1.f16)) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) + # elsif ((64'F(S1.f16) == 0.0) && (64'F(S2.f16) == 0.0)) then + # // 0/0 + # D0.f16 = 16'F(0xfe00) + # elsif ((64'F(abs(S1.f16)) == +INF) && (64'F(abs(S2.f16)) == +INF)) then + # // inf/inf + # D0.f16 = 16'F(0xfe00) + # elsif ((64'F(S1.f16) == 0.0) || (64'F(abs(S2.f16)) == +INF)) then + # // x/0, or inf/y + # D0.f16 = sign_out ? -INF.f16 : +INF.f16 + # elsif ((64'F(abs(S1.f16)) == +INF) || (64'F(S2.f16) == 0.0)) then + # // x/inf, 0/y + # D0.f16 = sign_out ? -16'0.0 : 16'0.0 + # else + # D0.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + sign_out = (sign(S1.f16) ^ sign(S2.f16)) + if isNAN(F(S2.f16)): + D0.f16 = F(cvtToQuietNAN(F(S2.f16))) + elif isNAN(F(S1.f16)): + D0.f16 = F(cvtToQuietNAN(F(S1.f16))) + elif ((F(S1.f16) == 0.0) and (F(S2.f16) == 0.0)): + D0.f16 = F(0xfe00) + elif ((F(abs(S1.f16)) == INF) and (F(abs(S2.f16)) == INF)): + D0.f16 = F(0xfe00) + elif ((F(S1.f16) == 0.0) or (F(abs(S2.f16)) == INF)): + D0.f16 = (((-INF).f16) if (sign_out) else (INF.f16)) + elif ((F(abs(S1.f16)) == INF) or (F(S2.f16) == 0.0)): + D0.f16 = ((-0.0) if (sign_out) else (0.0)) + else: + D0.f16 = ((-abs(S0.f16)) if (sign_out) else (abs(S0.f16))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 + S1.u32 + S2.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S0.u32 + S1.u32 + S2.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = ((S0.u32 & S1.u32) | S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (S0.u32 | S1.u32 | S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u32 | S1.u32 | S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (S0.u16) * (S1.u16) + S2.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (S0.i16) * (S1.i16) + S2.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = VCC.u64[laneId] ? S1.u16 : S0.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u16 = ((S1.u16) if (VCC.u64[laneId]) else (S0.u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.f16; + # tmp += S0[15 : 0].f16 * S1[15 : 0].f16; + # tmp += S0[31 : 16].f16 * S1[31 : 16].f16; + # D0.f16 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.f16) + tmp += S0[15 : 0].f16 * S1[15 : 0].f16 + tmp += S0[31 : 16].f16 * S1[31 : 16].f16 + D0.f16 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MINMAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = v_max_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = v_max_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAXMIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = v_min_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = v_min_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MINMAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = v_max_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = v_max_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAXMIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = v_min_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = v_min_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MINIMUMMAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = v_maximum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = v_maximum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAXIMUMMINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = v_minimum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = v_minimum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MINIMUMMAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = v_maximum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = v_maximum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAXIMUMMINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = v_minimum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = v_minimum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_S_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = pow(2.0F, S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = pow(2.0, S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_S_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = pow(16'2.0, S0.f16); + # D0[31 : 16] = 16'0x0 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = pow(2.0, S0.f16) + D0[31 : 16] = 0x0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_S_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = log2(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = log2(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_S_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = log2(S0.f16); + # D0[31 : 16] = 16'0x0 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = log2(S0.f16) + D0[31 : 16] = 0x0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_S_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / S0.f32 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / S0.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_S_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = 16'1.0 / S0.f16; + # D0[31 : 16] = 16'0x0 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = 1.0 / S0.f16 + D0[31 : 16] = 0x0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_S_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = 1.0F / sqrt(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = 1.0 / sqrt(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_S_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = 16'1.0 / sqrt(S0.f16); + # D0[31 : 16] = 16'0x0 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = 1.0 / sqrt(S0.f16) + D0[31 : 16] = 0x0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_S_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = sqrt(S0.f32) + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = sqrt(S0.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_S_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f16 = sqrt(S0.f16); + # D0[31 : 16] = 16'0x0 + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f16 = sqrt(S0.f16) + D0[31 : 16] = 0x0 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 + S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S0.u16 + S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 - S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S0.u16 - S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 * S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = S0.u16 * S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[31 : 16] = 16'B(v_cvt_i16_f32(S1.f32)); + # tmp[15 : 0] = 16'B(v_cvt_i16_f32(S0.f32)); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 16] = (v_cvt_i16_f32(S1.f32)) + tmp[15 : 0] = (v_cvt_i16_f32(S0.f32)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[31 : 16] = 16'B(v_cvt_u16_f32(S1.f32)); + # tmp[15 : 0] = 16'B(v_cvt_u16_f32(S0.f32)); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 16] = (v_cvt_u16_f32(S1.f32)) + tmp[15 : 0] = (v_cvt_u16_f32(S0.f32)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = S0.i16 + S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = S0.i16 + S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = S0.i16 - S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = S0.i16 - S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0[31 : 16].f16 = S1.f16; + # D0[15 : 0].f16 = S0.f16 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0[31 : 16].f16 = S1.f16 + D0[15 : 0].f16 = S0.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); + # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = f16_to_snorm(S0.f16) + tmp[31 : 16].i16 = f16_to_snorm(S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); + # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = f16_to_unorm(S0.f16) + tmp[31 : 16].u16 = f16_to_unorm(S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f32 = S0.f32 * 2.0F ** S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f32 = S0.f32 * 2.0 ** S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S1.u32; + # for i in 0 : 31 do + # tmp += S0[i].u32; + # // count i'th bit + # endfor; + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S1.u32) + for i in range(0, int(31)+1): + tmp += S0[i].u32 + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); + # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = f32_to_snorm(S0.f32) + tmp[31 : 16].i16 = f32_to_snorm(S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); + # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = f32_to_unorm(S0.f32) + tmp[31 : 16].u16 = f32_to_unorm(S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = u32_to_u16(S0.u32); + # tmp[31 : 16].u16 = u32_to_u16(S1.u32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = u32_to_u16(S0.u32) + tmp[31 : 16].u16 = u32_to_u16(S1.u32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = i32_to_i16(S0.i32); + # tmp[31 : 16].i16 = i32_to_i16(S1.i32); + S0 = Reg(s0) + S1 = Reg(s1) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = i32_to_i16(S0.i32) + tmp[31 : 16].i16 = i32_to_i16(S1.i32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + return result + +def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 - S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = S0.i32 - S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = S0.i32 + S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = S0.i32 + S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.f64 = S0.f64 * 2.0 ** S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.f64 = S0.f64 * 2.0 ** S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = S0.u32 * S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = S0.u32 * S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u32 = (((S0.u32) * (S1.u32)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i32 = (((S0.i32) * (S1.i32)) >> 32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = (S1.u16 << S0[3 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = (S1.u16 << S0[3 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = (S1.u16 >> S0[3 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = (S1.u16 >> S0[3 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i16 = (S1.i16 >> S0[3 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i16 = (S1.i16 >> S0[3 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64 = (S1.u64 >> S0[5 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u64 = (S1.u64 >> S0[5 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.i64 = (S1.i64 >> S0[5 : 0].u32) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.i64 = (S1.i64 >> S0[5 : 0].u32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_MINIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then + # TRAPSTS.INVALID = 1 + # endif; + # if isSignalNAN(S0.f64) then + # D0.f64 = cvtToQuietNAN(S0.f64) + # elsif isSignalNAN(S1.f64) then + # D0.f64 = cvtToQuietNAN(S1.f64) + # elsif isQuietNAN(S0.f64) then + # D0.f64 = S0.f64 + # elsif isQuietNAN(S1.f64) then + # D0.f64 = S1.f64 + # elsif ((S0.f64 < S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && sign(S0.f64) && + # !sign(S1.f64))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f64 = S0.f64 + # else + # D0.f64 = S1.f64 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): + TRAPSTS.INVALID = 1 + if isSignalNAN(S0.f64): + D0.f64 = cvtToQuietNAN(S0.f64) + elif isSignalNAN(S1.f64): + D0.f64 = cvtToQuietNAN(S1.f64) + elif isQuietNAN(S0.f64): + D0.f64 = S0.f64 + elif isQuietNAN(S1.f64): + D0.f64 = S1.f64 + elif ((S0.f64 < S1.f64) or ((abs(S0.f64) == 0.0) and (abs(S1.f64) == 0.0) and sign(S0.f64) and not sign(S1.f64))): + D0.f64 = S0.f64 + else: + D0.f64 = S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_MAXIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then + # TRAPSTS.INVALID = 1 + # endif; + # if isSignalNAN(S0.f64) then + # D0.f64 = cvtToQuietNAN(S0.f64) + # elsif isSignalNAN(S1.f64) then + # D0.f64 = cvtToQuietNAN(S1.f64) + # elsif isQuietNAN(S0.f64) then + # D0.f64 = S0.f64 + # elsif isQuietNAN(S1.f64) then + # D0.f64 = S1.f64 + # elsif ((S0.f64 > S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && !sign(S0.f64) && + # sign(S1.f64))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f64 = S0.f64 + # else + # D0.f64 = S1.f64 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): + TRAPSTS.INVALID = 1 + if isSignalNAN(S0.f64): + D0.f64 = cvtToQuietNAN(S0.f64) + elif isSignalNAN(S1.f64): + D0.f64 = cvtToQuietNAN(S1.f64) + elif isQuietNAN(S0.f64): + D0.f64 = S0.f64 + elif isQuietNAN(S1.f64): + D0.f64 = S1.f64 + elif ((S0.f64 > S1.f64) or ((abs(S0.f64) == 0.0) and (abs(S1.f64) == 0.0) and not sign(S0.f64) and sign(S1.f64))): + D0.f64 = S0.f64 + else: + D0.f64 = S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + +def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare lane : 32'U; + # if WAVE32 then + # lane = S1.u32[4 : 0].u32; + # // Lane select for wave32 + # else + # lane = S1.u32[5 : 0].u32; + # // Lane select for wave64 + # endif; + # D0.b32 = VGPR[lane][SRC0.u32] + S1 = Reg(s1) + D0 = Reg(d0) + SRC0 = Reg(src0_idx) + # --- compiled pseudocode --- + if WAVE32: + lane = S1.u32[4 : 0].u32 + else: + lane = S1.u32[5 : 0].u32 + D0.b32 = VGPR[lane][SRC0.u32] + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = (S0.u16 & S1.u16) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = (S0.u16 & S1.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = (S0.u16 | S1.u16) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = (S0.u16 | S1.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u16 = (S0.u16 ^ S1.u16) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.u16 = (S0.u16 ^ S1.u16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then + # TRAPSTS.INVALID = 1 + # endif; + # if isSignalNAN(64'F(S0.f32)) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif isSignalNAN(64'F(S1.f32)) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) + # elsif isQuietNAN(64'F(S0.f32)) then + # D0.f32 = S0.f32 + # elsif isQuietNAN(64'F(S1.f32)) then + # D0.f32 = S1.f32 + # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && + # !sign(S1.f32))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f32 = S0.f32 + # else + # D0.f32 = S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): + TRAPSTS.INVALID = 1 + if isSignalNAN(F(S0.f32)): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif isSignalNAN(F(S1.f32)): + D0.f32 = F(cvtToQuietNAN(F(S1.f32))) + elif isQuietNAN(F(S0.f32)): + D0.f32 = S0.f32 + elif isQuietNAN(F(S1.f32)): + D0.f32 = S1.f32 + elif ((S0.f32 < S1.f32) or ((abs(S0.f32) == 0.0) and (abs(S1.f32) == 0.0) and sign(S0.f32) and not sign(S1.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then + # TRAPSTS.INVALID = 1 + # endif; + # if isSignalNAN(64'F(S0.f32)) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) + # elsif isSignalNAN(64'F(S1.f32)) then + # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) + # elsif isQuietNAN(64'F(S0.f32)) then + # D0.f32 = S0.f32 + # elsif isQuietNAN(64'F(S1.f32)) then + # D0.f32 = S1.f32 + # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && + # sign(S1.f32))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f32 = S0.f32 + # else + # D0.f32 = S1.f32 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): + TRAPSTS.INVALID = 1 + if isSignalNAN(F(S0.f32)): + D0.f32 = F(cvtToQuietNAN(F(S0.f32))) + elif isSignalNAN(F(S1.f32)): + D0.f32 = F(cvtToQuietNAN(F(S1.f32))) + elif isQuietNAN(F(S0.f32)): + D0.f32 = S0.f32 + elif isQuietNAN(F(S1.f32)): + D0.f32 = S1.f32 + elif ((S0.f32 > S1.f32) or ((abs(S0.f32) == 0.0) and (abs(S1.f32) == 0.0) and not sign(S0.f32) and sign(S1.f32))): + D0.f32 = S0.f32 + else: + D0.f32 = S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then + # TRAPSTS.INVALID = 1 + # endif; + # if isSignalNAN(64'F(S0.f16)) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif isSignalNAN(64'F(S1.f16)) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) + # elsif isQuietNAN(64'F(S0.f16)) then + # D0.f16 = S0.f16 + # elsif isQuietNAN(64'F(S1.f16)) then + # D0.f16 = S1.f16 + # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && + # !sign(S1.f16))) then + # // NOTE: -0<+0 is TRUE in this comparison + # D0.f16 = S0.f16 + # else + # D0.f16 = S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): + TRAPSTS.INVALID = 1 + if isSignalNAN(F(S0.f16)): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif isSignalNAN(F(S1.f16)): + D0.f16 = F(cvtToQuietNAN(F(S1.f16))) + elif isQuietNAN(F(S0.f16)): + D0.f16 = S0.f16 + elif isQuietNAN(F(S1.f16)): + D0.f16 = S1.f16 + elif ((S0.f16 < S1.f16) or ((abs(S0.f16) == 0.0) and (abs(S1.f16) == 0.0) and sign(S0.f16) and not sign(S1.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then + # TRAPSTS.INVALID = 1 + # endif; + # if isSignalNAN(64'F(S0.f16)) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) + # elsif isSignalNAN(64'F(S1.f16)) then + # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) + # elsif isQuietNAN(64'F(S0.f16)) then + # D0.f16 = S0.f16 + # elsif isQuietNAN(64'F(S1.f16)) then + # D0.f16 = S1.f16 + # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && + # sign(S1.f16))) then + # // NOTE: +0>-0 is TRUE in this comparison + # D0.f16 = S0.f16 + # else + # D0.f16 = S1.f16 + # endif + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): + TRAPSTS.INVALID = 1 + if isSignalNAN(F(S0.f16)): + D0.f16 = F(cvtToQuietNAN(F(S0.f16))) + elif isSignalNAN(F(S1.f16)): + D0.f16 = F(cvtToQuietNAN(F(S1.f16))) + elif isQuietNAN(F(S0.f16)): + D0.f16 = S0.f16 + elif isQuietNAN(F(S1.f16)): + D0.f16 = S1.f16 + elif ((S0.f16 > S1.f16) or ((abs(S0.f16) == 0.0) and (abs(S1.f16) == 0.0) and not sign(S0.f16) and sign(S1.f16))): + D0.f16 = S0.f16 + else: + D0.f16 = S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +VOP3Op_FUNCTIONS = { + VOP3Op.V_MOV_B32: _VOP3Op_V_MOV_B32, + VOP3Op.V_READFIRSTLANE_B32: _VOP3Op_V_READFIRSTLANE_B32, + VOP3Op.V_CVT_I32_F64: _VOP3Op_V_CVT_I32_F64, + VOP3Op.V_CVT_F64_I32: _VOP3Op_V_CVT_F64_I32, + VOP3Op.V_CVT_F32_I32: _VOP3Op_V_CVT_F32_I32, + VOP3Op.V_CVT_F32_U32: _VOP3Op_V_CVT_F32_U32, + VOP3Op.V_CVT_U32_F32: _VOP3Op_V_CVT_U32_F32, + VOP3Op.V_CVT_I32_F32: _VOP3Op_V_CVT_I32_F32, + VOP3Op.V_CVT_F16_F32: _VOP3Op_V_CVT_F16_F32, + VOP3Op.V_CVT_F32_F16: _VOP3Op_V_CVT_F32_F16, + VOP3Op.V_CVT_NEAREST_I32_F32: _VOP3Op_V_CVT_NEAREST_I32_F32, + VOP3Op.V_CVT_FLOOR_I32_F32: _VOP3Op_V_CVT_FLOOR_I32_F32, + VOP3Op.V_CVT_F32_F64: _VOP3Op_V_CVT_F32_F64, + VOP3Op.V_CVT_F64_F32: _VOP3Op_V_CVT_F64_F32, + VOP3Op.V_CVT_F32_UBYTE0: _VOP3Op_V_CVT_F32_UBYTE0, + VOP3Op.V_CVT_F32_UBYTE1: _VOP3Op_V_CVT_F32_UBYTE1, + VOP3Op.V_CVT_F32_UBYTE2: _VOP3Op_V_CVT_F32_UBYTE2, + VOP3Op.V_CVT_F32_UBYTE3: _VOP3Op_V_CVT_F32_UBYTE3, + VOP3Op.V_CVT_U32_F64: _VOP3Op_V_CVT_U32_F64, + VOP3Op.V_CVT_F64_U32: _VOP3Op_V_CVT_F64_U32, + VOP3Op.V_TRUNC_F64: _VOP3Op_V_TRUNC_F64, + VOP3Op.V_CEIL_F64: _VOP3Op_V_CEIL_F64, + VOP3Op.V_RNDNE_F64: _VOP3Op_V_RNDNE_F64, + VOP3Op.V_FLOOR_F64: _VOP3Op_V_FLOOR_F64, + VOP3Op.V_MOV_B16: _VOP3Op_V_MOV_B16, + VOP3Op.V_FRACT_F32: _VOP3Op_V_FRACT_F32, + VOP3Op.V_TRUNC_F32: _VOP3Op_V_TRUNC_F32, + VOP3Op.V_CEIL_F32: _VOP3Op_V_CEIL_F32, + VOP3Op.V_RNDNE_F32: _VOP3Op_V_RNDNE_F32, + VOP3Op.V_FLOOR_F32: _VOP3Op_V_FLOOR_F32, + VOP3Op.V_EXP_F32: _VOP3Op_V_EXP_F32, + VOP3Op.V_LOG_F32: _VOP3Op_V_LOG_F32, + VOP3Op.V_RCP_F32: _VOP3Op_V_RCP_F32, + VOP3Op.V_RCP_IFLAG_F32: _VOP3Op_V_RCP_IFLAG_F32, + VOP3Op.V_RSQ_F32: _VOP3Op_V_RSQ_F32, + VOP3Op.V_RCP_F64: _VOP3Op_V_RCP_F64, + VOP3Op.V_RSQ_F64: _VOP3Op_V_RSQ_F64, + VOP3Op.V_SQRT_F32: _VOP3Op_V_SQRT_F32, + VOP3Op.V_SQRT_F64: _VOP3Op_V_SQRT_F64, + VOP3Op.V_SIN_F32: _VOP3Op_V_SIN_F32, + VOP3Op.V_COS_F32: _VOP3Op_V_COS_F32, + VOP3Op.V_NOT_B32: _VOP3Op_V_NOT_B32, + VOP3Op.V_BFREV_B32: _VOP3Op_V_BFREV_B32, + VOP3Op.V_CLZ_I32_U32: _VOP3Op_V_CLZ_I32_U32, + VOP3Op.V_CTZ_I32_B32: _VOP3Op_V_CTZ_I32_B32, + VOP3Op.V_CLS_I32: _VOP3Op_V_CLS_I32, + VOP3Op.V_FREXP_EXP_I32_F64: _VOP3Op_V_FREXP_EXP_I32_F64, + VOP3Op.V_FREXP_MANT_F64: _VOP3Op_V_FREXP_MANT_F64, + VOP3Op.V_FRACT_F64: _VOP3Op_V_FRACT_F64, + VOP3Op.V_FREXP_EXP_I32_F32: _VOP3Op_V_FREXP_EXP_I32_F32, + VOP3Op.V_FREXP_MANT_F32: _VOP3Op_V_FREXP_MANT_F32, + VOP3Op.V_MOVRELS_B32: _VOP3Op_V_MOVRELS_B32, + VOP3Op.V_CVT_F16_U16: _VOP3Op_V_CVT_F16_U16, + VOP3Op.V_CVT_F16_I16: _VOP3Op_V_CVT_F16_I16, + VOP3Op.V_CVT_U16_F16: _VOP3Op_V_CVT_U16_F16, + VOP3Op.V_CVT_I16_F16: _VOP3Op_V_CVT_I16_F16, + VOP3Op.V_RCP_F16: _VOP3Op_V_RCP_F16, + VOP3Op.V_SQRT_F16: _VOP3Op_V_SQRT_F16, + VOP3Op.V_RSQ_F16: _VOP3Op_V_RSQ_F16, + VOP3Op.V_LOG_F16: _VOP3Op_V_LOG_F16, + VOP3Op.V_EXP_F16: _VOP3Op_V_EXP_F16, + VOP3Op.V_FREXP_MANT_F16: _VOP3Op_V_FREXP_MANT_F16, + VOP3Op.V_FREXP_EXP_I16_F16: _VOP3Op_V_FREXP_EXP_I16_F16, + VOP3Op.V_FLOOR_F16: _VOP3Op_V_FLOOR_F16, + VOP3Op.V_CEIL_F16: _VOP3Op_V_CEIL_F16, + VOP3Op.V_TRUNC_F16: _VOP3Op_V_TRUNC_F16, + VOP3Op.V_RNDNE_F16: _VOP3Op_V_RNDNE_F16, + VOP3Op.V_FRACT_F16: _VOP3Op_V_FRACT_F16, + VOP3Op.V_SIN_F16: _VOP3Op_V_SIN_F16, + VOP3Op.V_COS_F16: _VOP3Op_V_COS_F16, + VOP3Op.V_SAT_PK_U8_I16: _VOP3Op_V_SAT_PK_U8_I16, + VOP3Op.V_CVT_NORM_I16_F16: _VOP3Op_V_CVT_NORM_I16_F16, + VOP3Op.V_CVT_NORM_U16_F16: _VOP3Op_V_CVT_NORM_U16_F16, + VOP3Op.V_NOT_B16: _VOP3Op_V_NOT_B16, + VOP3Op.V_CVT_I32_I16: _VOP3Op_V_CVT_I32_I16, + VOP3Op.V_CVT_U32_U16: _VOP3Op_V_CVT_U32_U16, + VOP3Op.V_CVT_F32_FP8: _VOP3Op_V_CVT_F32_FP8, + VOP3Op.V_CVT_F32_BF8: _VOP3Op_V_CVT_F32_BF8, + VOP3Op.V_CVT_PK_F32_FP8: _VOP3Op_V_CVT_PK_F32_FP8, + VOP3Op.V_CVT_PK_F32_BF8: _VOP3Op_V_CVT_PK_F32_BF8, + VOP3Op.V_CNDMASK_B32: _VOP3Op_V_CNDMASK_B32, + VOP3Op.V_ADD_F64: _VOP3Op_V_ADD_F64, + VOP3Op.V_ADD_F32: _VOP3Op_V_ADD_F32, + VOP3Op.V_SUB_F32: _VOP3Op_V_SUB_F32, + VOP3Op.V_SUBREV_F32: _VOP3Op_V_SUBREV_F32, + VOP3Op.V_MUL_F64: _VOP3Op_V_MUL_F64, + VOP3Op.V_MUL_DX9_ZERO_F32: _VOP3Op_V_MUL_DX9_ZERO_F32, + VOP3Op.V_MUL_F32: _VOP3Op_V_MUL_F32, + VOP3Op.V_MUL_I32_I24: _VOP3Op_V_MUL_I32_I24, + VOP3Op.V_MUL_HI_I32_I24: _VOP3Op_V_MUL_HI_I32_I24, + VOP3Op.V_MUL_U32_U24: _VOP3Op_V_MUL_U32_U24, + VOP3Op.V_MUL_HI_U32_U24: _VOP3Op_V_MUL_HI_U32_U24, + VOP3Op.V_MIN_NUM_F64: _VOP3Op_V_MIN_NUM_F64, + VOP3Op.V_MAX_NUM_F64: _VOP3Op_V_MAX_NUM_F64, + VOP3Op.V_MIN_I32: _VOP3Op_V_MIN_I32, + VOP3Op.V_MAX_I32: _VOP3Op_V_MAX_I32, + VOP3Op.V_MIN_U32: _VOP3Op_V_MIN_U32, + VOP3Op.V_MAX_U32: _VOP3Op_V_MAX_U32, + VOP3Op.V_MIN_NUM_F32: _VOP3Op_V_MIN_NUM_F32, + VOP3Op.V_MAX_NUM_F32: _VOP3Op_V_MAX_NUM_F32, + VOP3Op.V_LSHLREV_B32: _VOP3Op_V_LSHLREV_B32, + VOP3Op.V_LSHRREV_B32: _VOP3Op_V_LSHRREV_B32, + VOP3Op.V_ASHRREV_I32: _VOP3Op_V_ASHRREV_I32, + VOP3Op.V_AND_B32: _VOP3Op_V_AND_B32, + VOP3Op.V_OR_B32: _VOP3Op_V_OR_B32, + VOP3Op.V_XOR_B32: _VOP3Op_V_XOR_B32, + VOP3Op.V_XNOR_B32: _VOP3Op_V_XNOR_B32, + VOP3Op.V_LSHLREV_B64: _VOP3Op_V_LSHLREV_B64, + VOP3Op.V_ADD_NC_U32: _VOP3Op_V_ADD_NC_U32, + VOP3Op.V_SUB_NC_U32: _VOP3Op_V_SUB_NC_U32, + VOP3Op.V_SUBREV_NC_U32: _VOP3Op_V_SUBREV_NC_U32, + VOP3Op.V_FMAC_F32: _VOP3Op_V_FMAC_F32, + VOP3Op.V_CVT_PK_RTZ_F16_F32: _VOP3Op_V_CVT_PK_RTZ_F16_F32, + VOP3Op.V_MIN_NUM_F16: _VOP3Op_V_MIN_NUM_F16, + VOP3Op.V_MAX_NUM_F16: _VOP3Op_V_MAX_NUM_F16, + VOP3Op.V_ADD_F16: _VOP3Op_V_ADD_F16, + VOP3Op.V_SUB_F16: _VOP3Op_V_SUB_F16, + VOP3Op.V_SUBREV_F16: _VOP3Op_V_SUBREV_F16, + VOP3Op.V_MUL_F16: _VOP3Op_V_MUL_F16, + VOP3Op.V_FMAC_F16: _VOP3Op_V_FMAC_F16, + VOP3Op.V_LDEXP_F16: _VOP3Op_V_LDEXP_F16, + VOP3Op.V_FMA_DX9_ZERO_F32: _VOP3Op_V_FMA_DX9_ZERO_F32, + VOP3Op.V_MAD_I32_I24: _VOP3Op_V_MAD_I32_I24, + VOP3Op.V_MAD_U32_U24: _VOP3Op_V_MAD_U32_U24, + VOP3Op.V_CUBEID_F32: _VOP3Op_V_CUBEID_F32, + VOP3Op.V_CUBESC_F32: _VOP3Op_V_CUBESC_F32, + VOP3Op.V_CUBETC_F32: _VOP3Op_V_CUBETC_F32, + VOP3Op.V_CUBEMA_F32: _VOP3Op_V_CUBEMA_F32, + VOP3Op.V_BFE_U32: _VOP3Op_V_BFE_U32, + VOP3Op.V_BFE_I32: _VOP3Op_V_BFE_I32, + VOP3Op.V_BFI_B32: _VOP3Op_V_BFI_B32, + VOP3Op.V_FMA_F32: _VOP3Op_V_FMA_F32, + VOP3Op.V_FMA_F64: _VOP3Op_V_FMA_F64, + VOP3Op.V_LERP_U8: _VOP3Op_V_LERP_U8, + VOP3Op.V_ALIGNBIT_B32: _VOP3Op_V_ALIGNBIT_B32, + VOP3Op.V_ALIGNBYTE_B32: _VOP3Op_V_ALIGNBYTE_B32, + VOP3Op.V_MULLIT_F32: _VOP3Op_V_MULLIT_F32, + VOP3Op.V_MIN3_I32: _VOP3Op_V_MIN3_I32, + VOP3Op.V_MIN3_U32: _VOP3Op_V_MIN3_U32, + VOP3Op.V_MAX3_I32: _VOP3Op_V_MAX3_I32, + VOP3Op.V_MAX3_U32: _VOP3Op_V_MAX3_U32, + VOP3Op.V_MED3_I32: _VOP3Op_V_MED3_I32, + VOP3Op.V_MED3_U32: _VOP3Op_V_MED3_U32, + VOP3Op.V_SAD_U8: _VOP3Op_V_SAD_U8, + VOP3Op.V_SAD_U16: _VOP3Op_V_SAD_U16, + VOP3Op.V_SAD_U32: _VOP3Op_V_SAD_U32, + VOP3Op.V_CVT_PK_U8_F32: _VOP3Op_V_CVT_PK_U8_F32, + VOP3Op.V_DIV_FIXUP_F32: _VOP3Op_V_DIV_FIXUP_F32, + VOP3Op.V_DIV_FIXUP_F64: _VOP3Op_V_DIV_FIXUP_F64, + VOP3Op.V_MIN3_NUM_F32: _VOP3Op_V_MIN3_NUM_F32, + VOP3Op.V_MAX3_NUM_F32: _VOP3Op_V_MAX3_NUM_F32, + VOP3Op.V_MIN3_NUM_F16: _VOP3Op_V_MIN3_NUM_F16, + VOP3Op.V_MAX3_NUM_F16: _VOP3Op_V_MAX3_NUM_F16, + VOP3Op.V_MINIMUM3_F32: _VOP3Op_V_MINIMUM3_F32, + VOP3Op.V_MAXIMUM3_F32: _VOP3Op_V_MAXIMUM3_F32, + VOP3Op.V_MINIMUM3_F16: _VOP3Op_V_MINIMUM3_F16, + VOP3Op.V_MAXIMUM3_F16: _VOP3Op_V_MAXIMUM3_F16, + VOP3Op.V_MED3_NUM_F32: _VOP3Op_V_MED3_NUM_F32, + VOP3Op.V_MED3_NUM_F16: _VOP3Op_V_MED3_NUM_F16, + VOP3Op.V_DIV_FMAS_F32: _VOP3Op_V_DIV_FMAS_F32, + VOP3Op.V_DIV_FMAS_F64: _VOP3Op_V_DIV_FMAS_F64, + VOP3Op.V_MSAD_U8: _VOP3Op_V_MSAD_U8, + VOP3Op.V_XOR3_B32: _VOP3Op_V_XOR3_B32, + VOP3Op.V_MAD_U16: _VOP3Op_V_MAD_U16, + VOP3Op.V_XAD_U32: _VOP3Op_V_XAD_U32, + VOP3Op.V_LSHL_ADD_U32: _VOP3Op_V_LSHL_ADD_U32, + VOP3Op.V_ADD_LSHL_U32: _VOP3Op_V_ADD_LSHL_U32, + VOP3Op.V_FMA_F16: _VOP3Op_V_FMA_F16, + VOP3Op.V_MIN3_I16: _VOP3Op_V_MIN3_I16, + VOP3Op.V_MIN3_U16: _VOP3Op_V_MIN3_U16, + VOP3Op.V_MAX3_I16: _VOP3Op_V_MAX3_I16, + VOP3Op.V_MAX3_U16: _VOP3Op_V_MAX3_U16, + VOP3Op.V_MED3_I16: _VOP3Op_V_MED3_I16, + VOP3Op.V_MED3_U16: _VOP3Op_V_MED3_U16, + VOP3Op.V_MAD_I16: _VOP3Op_V_MAD_I16, + VOP3Op.V_DIV_FIXUP_F16: _VOP3Op_V_DIV_FIXUP_F16, + VOP3Op.V_ADD3_U32: _VOP3Op_V_ADD3_U32, + VOP3Op.V_LSHL_OR_B32: _VOP3Op_V_LSHL_OR_B32, + VOP3Op.V_AND_OR_B32: _VOP3Op_V_AND_OR_B32, + VOP3Op.V_OR3_B32: _VOP3Op_V_OR3_B32, + VOP3Op.V_MAD_U32_U16: _VOP3Op_V_MAD_U32_U16, + VOP3Op.V_MAD_I32_I16: _VOP3Op_V_MAD_I32_I16, + VOP3Op.V_CNDMASK_B16: _VOP3Op_V_CNDMASK_B16, + VOP3Op.V_MAXMIN_U32: _VOP3Op_V_MAXMIN_U32, + VOP3Op.V_MINMAX_U32: _VOP3Op_V_MINMAX_U32, + VOP3Op.V_MAXMIN_I32: _VOP3Op_V_MAXMIN_I32, + VOP3Op.V_MINMAX_I32: _VOP3Op_V_MINMAX_I32, + VOP3Op.V_DOT2_F16_F16: _VOP3Op_V_DOT2_F16_F16, + VOP3Op.V_MINMAX_NUM_F32: _VOP3Op_V_MINMAX_NUM_F32, + VOP3Op.V_MAXMIN_NUM_F32: _VOP3Op_V_MAXMIN_NUM_F32, + VOP3Op.V_MINMAX_NUM_F16: _VOP3Op_V_MINMAX_NUM_F16, + VOP3Op.V_MAXMIN_NUM_F16: _VOP3Op_V_MAXMIN_NUM_F16, + VOP3Op.V_MINIMUMMAXIMUM_F32: _VOP3Op_V_MINIMUMMAXIMUM_F32, + VOP3Op.V_MAXIMUMMINIMUM_F32: _VOP3Op_V_MAXIMUMMINIMUM_F32, + VOP3Op.V_MINIMUMMAXIMUM_F16: _VOP3Op_V_MINIMUMMAXIMUM_F16, + VOP3Op.V_MAXIMUMMINIMUM_F16: _VOP3Op_V_MAXIMUMMINIMUM_F16, + VOP3Op.V_S_EXP_F32: _VOP3Op_V_S_EXP_F32, + VOP3Op.V_S_EXP_F16: _VOP3Op_V_S_EXP_F16, + VOP3Op.V_S_LOG_F32: _VOP3Op_V_S_LOG_F32, + VOP3Op.V_S_LOG_F16: _VOP3Op_V_S_LOG_F16, + VOP3Op.V_S_RCP_F32: _VOP3Op_V_S_RCP_F32, + VOP3Op.V_S_RCP_F16: _VOP3Op_V_S_RCP_F16, + VOP3Op.V_S_RSQ_F32: _VOP3Op_V_S_RSQ_F32, + VOP3Op.V_S_RSQ_F16: _VOP3Op_V_S_RSQ_F16, + VOP3Op.V_S_SQRT_F32: _VOP3Op_V_S_SQRT_F32, + VOP3Op.V_S_SQRT_F16: _VOP3Op_V_S_SQRT_F16, + VOP3Op.V_ADD_NC_U16: _VOP3Op_V_ADD_NC_U16, + VOP3Op.V_SUB_NC_U16: _VOP3Op_V_SUB_NC_U16, + VOP3Op.V_MUL_LO_U16: _VOP3Op_V_MUL_LO_U16, + VOP3Op.V_CVT_PK_I16_F32: _VOP3Op_V_CVT_PK_I16_F32, + VOP3Op.V_CVT_PK_U16_F32: _VOP3Op_V_CVT_PK_U16_F32, + VOP3Op.V_MAX_U16: _VOP3Op_V_MAX_U16, + VOP3Op.V_MAX_I16: _VOP3Op_V_MAX_I16, + VOP3Op.V_MIN_U16: _VOP3Op_V_MIN_U16, + VOP3Op.V_MIN_I16: _VOP3Op_V_MIN_I16, + VOP3Op.V_ADD_NC_I16: _VOP3Op_V_ADD_NC_I16, + VOP3Op.V_SUB_NC_I16: _VOP3Op_V_SUB_NC_I16, + VOP3Op.V_PACK_B32_F16: _VOP3Op_V_PACK_B32_F16, + VOP3Op.V_CVT_PK_NORM_I16_F16: _VOP3Op_V_CVT_PK_NORM_I16_F16, + VOP3Op.V_CVT_PK_NORM_U16_F16: _VOP3Op_V_CVT_PK_NORM_U16_F16, + VOP3Op.V_LDEXP_F32: _VOP3Op_V_LDEXP_F32, + VOP3Op.V_BFM_B32: _VOP3Op_V_BFM_B32, + VOP3Op.V_BCNT_U32_B32: _VOP3Op_V_BCNT_U32_B32, + VOP3Op.V_CVT_PK_NORM_I16_F32: _VOP3Op_V_CVT_PK_NORM_I16_F32, + VOP3Op.V_CVT_PK_NORM_U16_F32: _VOP3Op_V_CVT_PK_NORM_U16_F32, + VOP3Op.V_CVT_PK_U16_U32: _VOP3Op_V_CVT_PK_U16_U32, + VOP3Op.V_CVT_PK_I16_I32: _VOP3Op_V_CVT_PK_I16_I32, + VOP3Op.V_SUB_NC_I32: _VOP3Op_V_SUB_NC_I32, + VOP3Op.V_ADD_NC_I32: _VOP3Op_V_ADD_NC_I32, + VOP3Op.V_LDEXP_F64: _VOP3Op_V_LDEXP_F64, + VOP3Op.V_MUL_LO_U32: _VOP3Op_V_MUL_LO_U32, + VOP3Op.V_MUL_HI_U32: _VOP3Op_V_MUL_HI_U32, + VOP3Op.V_MUL_HI_I32: _VOP3Op_V_MUL_HI_I32, + VOP3Op.V_LSHLREV_B16: _VOP3Op_V_LSHLREV_B16, + VOP3Op.V_LSHRREV_B16: _VOP3Op_V_LSHRREV_B16, + VOP3Op.V_ASHRREV_I16: _VOP3Op_V_ASHRREV_I16, + VOP3Op.V_LSHRREV_B64: _VOP3Op_V_LSHRREV_B64, + VOP3Op.V_ASHRREV_I64: _VOP3Op_V_ASHRREV_I64, + VOP3Op.V_MINIMUM_F64: _VOP3Op_V_MINIMUM_F64, + VOP3Op.V_MAXIMUM_F64: _VOP3Op_V_MAXIMUM_F64, + VOP3Op.V_READLANE_B32: _VOP3Op_V_READLANE_B32, + VOP3Op.V_AND_B16: _VOP3Op_V_AND_B16, + VOP3Op.V_OR_B16: _VOP3Op_V_OR_B16, + VOP3Op.V_XOR_B16: _VOP3Op_V_XOR_B16, + VOP3Op.V_MINIMUM_F32: _VOP3Op_V_MINIMUM_F32, + VOP3Op.V_MAXIMUM_F32: _VOP3Op_V_MAXIMUM_F32, + VOP3Op.V_MINIMUM_F16: _VOP3Op_V_MINIMUM_F16, + VOP3Op.V_MAXIMUM_F16: _VOP3Op_V_MAXIMUM_F16, +} + +def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; + # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) + VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; + # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) + VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; + # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) + VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC = 0x0LL; + # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then + # D0.f32 = NAN.f32 + # elsif exponent(S2.f32) - exponent(S1.f32) >= 96 then + # // N/D near MAX_FLOAT_F32 + # VCC = 0x1LL; + # if S0.f32 == S1.f32 then + # // Only scale the denominator + # D0.f32 = ldexp(S0.f32, 64) + # endif + # elsif S1.f32 == DENORM.f32 then + # D0.f32 = ldexp(S0.f32, 64) + # elsif ((1.0 / 64'F(S1.f32) == DENORM.f64) && (S2.f32 / S1.f32 == DENORM.f32)) then + # VCC = 0x1LL; + # if S0.f32 == S1.f32 then + # // Only scale the denominator + # D0.f32 = ldexp(S0.f32, 64) + # endif + # elsif 1.0 / 64'F(S1.f32) == DENORM.f64 then + # D0.f32 = ldexp(S0.f32, -64) + # elsif S2.f32 / S1.f32 == DENORM.f32 then + # VCC = 0x1LL; + # if S0.f32 == S2.f32 then + # // Only scale the numerator + # D0.f32 = ldexp(S0.f32, 64) + # endif + # elsif exponent(S2.f32) <= 23 then + # // Numerator is tiny + # D0.f32 = ldexp(S0.f32, 64) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(s0) + VCC = Reg(vcc) + # --- compiled pseudocode --- + VCC = Reg(0x0) + if ((F(S2.f32) == 0.0) or (F(S1.f32) == 0.0)): + VCC = Reg(0x1); D0.f32 = float("nan") + elif exponent(S2.f32) - exponent(S1.f32) >= 96: + VCC = Reg(0x1) + if S0.f32 == S1.f32: + D0.f32 = ldexp(S0.f32, 64) + elif False: + pass # denorm check moved to end + elif ((1.0 / F(S1.f32) == DENORM.f64) and (S2.f32 / S1.f32 == DENORM.f32)): + VCC = Reg(0x1) + if S0.f32 == S1.f32: + D0.f32 = ldexp(S0.f32, 64) + elif 1.0 / F(S1.f32) == DENORM.f64: + D0.f32 = ldexp(S0.f32, -64) + elif S2.f32 / S1.f32 == DENORM.f32: + VCC = Reg(0x1) + elif exponent(S2.f32) <= 23: + VCC = Reg(0x1); D0.f32 = ldexp(S0.f32, 64) + if S1.f32 == DENORM.f32: + D0.f32 = float("nan") + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC = 0x0LL; + # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then + # D0.f64 = NAN.f64 + # elsif exponent(S2.f64) - exponent(S1.f64) >= 768 then + # // N/D near MAX_FLOAT_F64 + # VCC = 0x1LL; + # if S0.f64 == S1.f64 then + # // Only scale the denominator + # D0.f64 = ldexp(S0.f64, 128) + # endif + # elsif S1.f64 == DENORM.f64 then + # D0.f64 = ldexp(S0.f64, 128) + # elsif ((1.0 / S1.f64 == DENORM.f64) && (S2.f64 / S1.f64 == DENORM.f64)) then + # VCC = 0x1LL; + # if S0.f64 == S1.f64 then + # // Only scale the denominator + # D0.f64 = ldexp(S0.f64, 128) + # endif + # elsif 1.0 / S1.f64 == DENORM.f64 then + # D0.f64 = ldexp(S0.f64, -128) + # elsif S2.f64 / S1.f64 == DENORM.f64 then + # VCC = 0x1LL; + # if S0.f64 == S2.f64 then + # // Only scale the numerator + # D0.f64 = ldexp(S0.f64, 128) + # endif + # elsif exponent(S2.f64) <= 53 then + # // Numerator is tiny + # D0.f64 = ldexp(S0.f64, 128) + # endif + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(s0) + VCC = Reg(vcc) + # --- compiled pseudocode --- + VCC = Reg(0x0) + if ((S2.f64 == 0.0) or (S1.f64 == 0.0)): + VCC = Reg(0x1); D0.f64 = float("nan") + elif exponent(S2.f64) - exponent(S1.f64) >= 768: + VCC = Reg(0x1) + if S0.f64 == S1.f64: + D0.f64 = ldexp(S0.f64, 128) + elif False: + pass # denorm check moved to end + elif ((1.0 / S1.f64 == DENORM.f64) and (S2.f64 / S1.f64 == DENORM.f64)): + VCC = Reg(0x1) + if S0.f64 == S1.f64: + D0.f64 = ldexp(S0.f64, 128) + elif 1.0 / S1.f64 == DENORM.f64: + D0.f64 = ldexp(S0.f64, -128) + elif S2.f64 / S1.f64 == DENORM.f64: + VCC = Reg(0x1) + elif exponent(S2.f64) <= 53: + D0.f64 = ldexp(S0.f64, 128) + if S1.f64 == DENORM.f64: + D0.f64 = float("nan") + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['vcc_lane'] = (VCC._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOP3SDOp_V_MAD_CO_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + D1 = Reg(0) + # --- compiled pseudocode --- + _full = ((S0.u32) * (S1.u32) + (S2.u64)) + D0.u64 = int(_full) & 0xffffffffffffffff + D1 = Reg((int(_full) >> 64) & 1) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + result['d1'] = D1._val & 1 + return result + +def _VOP3SDOp_V_MAD_CO_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + D1 = Reg(0) + # --- compiled pseudocode --- + _full = ((S0.i32) * (S1.i32) + (S2.i64)) + D0.u64 = int(_full) & 0xffffffffffffffff + D1 = Reg((int(_full) >> 64) & 1) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + result['d1'] = D1._val & 1 + return result + +def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = 64'U(S0.u32) + 64'U(S1.u32); + # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg((S0.u32) + (S1.u32)) + VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S0.u32 - S1.u32; + # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S0.u32 - S1.u32) + VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S1.u32 - S0.u32; + # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; + # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. + # D0.u32 = tmp.u32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + tmp = Reg(0) + laneId = lane + # --- compiled pseudocode --- + tmp = Reg(S1.u32 - S0.u32) + VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0)) + D0.u32 = tmp.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['vcc_lane'] = (VCC._val >> lane) & 1 + return result + +VOP3SDOp_FUNCTIONS = { + VOP3SDOp.V_ADD_CO_CI_U32: _VOP3SDOp_V_ADD_CO_CI_U32, + VOP3SDOp.V_SUB_CO_CI_U32: _VOP3SDOp_V_SUB_CO_CI_U32, + VOP3SDOp.V_SUBREV_CO_CI_U32: _VOP3SDOp_V_SUBREV_CO_CI_U32, + VOP3SDOp.V_DIV_SCALE_F32: _VOP3SDOp_V_DIV_SCALE_F32, + VOP3SDOp.V_DIV_SCALE_F64: _VOP3SDOp_V_DIV_SCALE_F64, + VOP3SDOp.V_MAD_CO_U64_U32: _VOP3SDOp_V_MAD_CO_U64_U32, + VOP3SDOp.V_MAD_CO_I64_I32: _VOP3SDOp_V_MAD_CO_I64_I32, + VOP3SDOp.V_ADD_CO_U32: _VOP3SDOp_V_ADD_CO_U32, + VOP3SDOp.V_SUB_CO_U32: _VOP3SDOp_V_SUB_CO_U32, + VOP3SDOp.V_SUBREV_CO_U32: _VOP3SDOp_V_SUBREV_CO_U32, +} + +def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; + # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16 + tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; + # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; + # D0.b32 = tmp.b32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + D0.b32 = tmp.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; + # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16 + tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; + # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16 + tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); + # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); + # D0.b32 = tmp.b32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32) + tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32) + D0.b32 = tmp.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); + # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); + # D0.b32 = tmp.b32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32) + tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32) + D0.b32 = tmp.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); + # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); + # D0.b32 = tmp.b32 + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32) + tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32) + D0.b32 = tmp.b32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; + # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 >= S1[15 : 0].i16) else (S1[15 : 0].i16)) + tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 >= S1[31 : 16].i16) else (S1[31 : 16].i16)) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; + # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 < S1[15 : 0].i16) else (S1[15 : 0].i16)) + tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 < S1[31 : 16].i16) else (S1[31 : 16].i16)) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; + # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16 + tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; + # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16 + tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; + # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16 + tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; + # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 >= S1[15 : 0].u16) else (S1[15 : 0].u16)) + tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 >= S1[31 : 16].u16) else (S1[31 : 16].u16)) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; + # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 < S1[15 : 0].u16) else (S1[15 : 0].u16)) + tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 < S1[31 : 16].u16) else (S1[31 : 16].u16)) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); + # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16) + tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; + # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16 + tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; + # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16 + tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16 + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.f32; + # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); + # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); + # D0.f32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.f32) + tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) + tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) + D0.f32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.u32; + # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); + # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); + # tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8); + # tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.u32) + tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8) + tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8) + tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8) + tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.u32; + # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); + # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); + # tmp += u4_to_u32(S0[11 : 8].u4) * u4_to_u32(S1[11 : 8].u4); + # tmp += u4_to_u32(S0[15 : 12].u4) * u4_to_u32(S1[15 : 12].u4); + # tmp += u4_to_u32(S0[19 : 16].u4) * u4_to_u32(S1[19 : 16].u4); + # tmp += u4_to_u32(S0[23 : 20].u4) * u4_to_u32(S1[23 : 20].u4); + # tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4); + # tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4); + # D0.u32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.u32) + tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4) + tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4) + tmp += u4_to_u32(S0[11 : 8].u4) * u4_to_u32(S1[11 : 8].u4) + tmp += u4_to_u32(S0[15 : 12].u4) * u4_to_u32(S1[15 : 12].u4) + tmp += u4_to_u32(S0[19 : 16].u4) * u4_to_u32(S1[19 : 16].u4) + tmp += u4_to_u32(S0[23 : 20].u4) * u4_to_u32(S1[23 : 20].u4) + tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4) + tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4) + D0.u32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].f16 = v_min_num_f16(S0[15 : 0].f16, S1[15 : 0].f16); + # tmp[31 : 16].f16 = v_min_num_f16(S0[31 : 16].f16, S1[31 : 16].f16); + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].f16 = v_min_num_f16(S0[15 : 0].f16, S1[15 : 0].f16) + tmp[31 : 16].f16 = v_min_num_f16(S0[31 : 16].f16, S1[31 : 16].f16) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].f16 = v_max_num_f16(S0[15 : 0].f16, S1[15 : 0].f16); + # tmp[31 : 16].f16 = v_max_num_f16(S0[31 : 16].f16, S1[31 : 16].f16); + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].f16 = v_max_num_f16(S0[15 : 0].f16, S1[15 : 0].f16) + tmp[31 : 16].f16 = v_max_num_f16(S0[31 : 16].f16, S1[31 : 16].f16) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].f16 = v_minimum_f16(S0[15 : 0].f16, S1[15 : 0].f16); + # tmp[31 : 16].f16 = v_minimum_f16(S0[31 : 16].f16, S1[31 : 16].f16); + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].f16 = v_minimum_f16(S0[15 : 0].f16, S1[15 : 0].f16) + tmp[31 : 16].f16 = v_minimum_f16(S0[31 : 16].f16, S1[31 : 16].f16) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_PK_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # declare tmp : 32'B; + # tmp[15 : 0].f16 = v_maximum_f16(S0[15 : 0].f16, S1[15 : 0].f16); + # tmp[31 : 16].f16 = v_maximum_f16(S0[31 : 16].f16, S1[31 : 16].f16); + # D0.b32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp[15 : 0].f16 = v_maximum_f16(S0[15 : 0].f16, S1[15 : 0].f16) + tmp[31 : 16].f16 = v_maximum_f16(S0[31 : 16].f16, S1[31 : 16].f16) + D0.b32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT4_F32_FP8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.f32; + # tmp += 32'F(S0[7 : 0].fp8) * 32'F(S1[7 : 0].bf8); + # tmp += 32'F(S0[15 : 8].fp8) * 32'F(S1[15 : 8].bf8); + # tmp += 32'F(S0[23 : 16].fp8) * 32'F(S1[23 : 16].bf8); + # tmp += 32'F(S0[31 : 24].fp8) * 32'F(S1[31 : 24].bf8); + # D0.f32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.f32) + tmp += F(S0[7 : 0].fp8) * F(S1[7 : 0].bf8) + tmp += F(S0[15 : 8].fp8) * F(S1[15 : 8].bf8) + tmp += F(S0[23 : 16].fp8) * F(S1[23 : 16].bf8) + tmp += F(S0[31 : 24].fp8) * F(S1[31 : 24].bf8) + D0.f32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT4_F32_BF8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.f32; + # tmp += 32'F(S0[7 : 0].bf8) * 32'F(S1[7 : 0].fp8); + # tmp += 32'F(S0[15 : 8].bf8) * 32'F(S1[15 : 8].fp8); + # tmp += 32'F(S0[23 : 16].bf8) * 32'F(S1[23 : 16].fp8); + # tmp += 32'F(S0[31 : 24].bf8) * 32'F(S1[31 : 24].fp8); + # D0.f32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.f32) + tmp += F(S0[7 : 0].bf8) * F(S1[7 : 0].fp8) + tmp += F(S0[15 : 8].bf8) * F(S1[15 : 8].fp8) + tmp += F(S0[23 : 16].bf8) * F(S1[23 : 16].fp8) + tmp += F(S0[31 : 24].bf8) * F(S1[31 : 24].fp8) + D0.f32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT4_F32_FP8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.f32; + # tmp += 32'F(S0[7 : 0].fp8) * 32'F(S1[7 : 0].fp8); + # tmp += 32'F(S0[15 : 8].fp8) * 32'F(S1[15 : 8].fp8); + # tmp += 32'F(S0[23 : 16].fp8) * 32'F(S1[23 : 16].fp8); + # tmp += 32'F(S0[31 : 24].fp8) * 32'F(S1[31 : 24].fp8); + # D0.f32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.f32) + tmp += F(S0[7 : 0].fp8) * F(S1[7 : 0].fp8) + tmp += F(S0[15 : 8].fp8) * F(S1[15 : 8].fp8) + tmp += F(S0[23 : 16].fp8) * F(S1[23 : 16].fp8) + tmp += F(S0[31 : 24].fp8) * F(S1[31 : 24].fp8) + D0.f32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3POp_V_DOT4_F32_BF8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # tmp = S2.f32; + # tmp += 32'F(S0[7 : 0].bf8) * 32'F(S1[7 : 0].bf8); + # tmp += 32'F(S0[15 : 8].bf8) * 32'F(S1[15 : 8].bf8); + # tmp += 32'F(S0[23 : 16].bf8) * 32'F(S1[23 : 16].bf8); + # tmp += 32'F(S0[31 : 24].bf8) * 32'F(S1[31 : 24].bf8); + # D0.f32 = tmp + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + tmp = Reg(0) + # --- compiled pseudocode --- + tmp = Reg(S2.f32) + tmp += F(S0[7 : 0].bf8) * F(S1[7 : 0].bf8) + tmp += F(S0[15 : 8].bf8) * F(S1[15 : 8].bf8) + tmp += F(S0[23 : 16].bf8) * F(S1[23 : 16].bf8) + tmp += F(S0[31 : 24].bf8) * F(S1[31 : 24].bf8) + D0.f32 = tmp + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +VOP3POp_FUNCTIONS = { + VOP3POp.V_PK_MAD_I16: _VOP3POp_V_PK_MAD_I16, + VOP3POp.V_PK_MUL_LO_U16: _VOP3POp_V_PK_MUL_LO_U16, + VOP3POp.V_PK_ADD_I16: _VOP3POp_V_PK_ADD_I16, + VOP3POp.V_PK_SUB_I16: _VOP3POp_V_PK_SUB_I16, + VOP3POp.V_PK_LSHLREV_B16: _VOP3POp_V_PK_LSHLREV_B16, + VOP3POp.V_PK_LSHRREV_B16: _VOP3POp_V_PK_LSHRREV_B16, + VOP3POp.V_PK_ASHRREV_I16: _VOP3POp_V_PK_ASHRREV_I16, + VOP3POp.V_PK_MAX_I16: _VOP3POp_V_PK_MAX_I16, + VOP3POp.V_PK_MIN_I16: _VOP3POp_V_PK_MIN_I16, + VOP3POp.V_PK_MAD_U16: _VOP3POp_V_PK_MAD_U16, + VOP3POp.V_PK_ADD_U16: _VOP3POp_V_PK_ADD_U16, + VOP3POp.V_PK_SUB_U16: _VOP3POp_V_PK_SUB_U16, + VOP3POp.V_PK_MAX_U16: _VOP3POp_V_PK_MAX_U16, + VOP3POp.V_PK_MIN_U16: _VOP3POp_V_PK_MIN_U16, + VOP3POp.V_PK_FMA_F16: _VOP3POp_V_PK_FMA_F16, + VOP3POp.V_PK_ADD_F16: _VOP3POp_V_PK_ADD_F16, + VOP3POp.V_PK_MUL_F16: _VOP3POp_V_PK_MUL_F16, + VOP3POp.V_DOT2_F32_F16: _VOP3POp_V_DOT2_F32_F16, + VOP3POp.V_DOT4_U32_U8: _VOP3POp_V_DOT4_U32_U8, + VOP3POp.V_DOT8_U32_U4: _VOP3POp_V_DOT8_U32_U4, + VOP3POp.V_PK_MIN_NUM_F16: _VOP3POp_V_PK_MIN_NUM_F16, + VOP3POp.V_PK_MAX_NUM_F16: _VOP3POp_V_PK_MAX_NUM_F16, + VOP3POp.V_PK_MINIMUM_F16: _VOP3POp_V_PK_MINIMUM_F16, + VOP3POp.V_PK_MAXIMUM_F16: _VOP3POp_V_PK_MAXIMUM_F16, + VOP3POp.V_DOT4_F32_FP8_BF8: _VOP3POp_V_DOT4_F32_FP8_BF8, + VOP3POp.V_DOT4_F32_BF8_FP8: _VOP3POp_V_DOT4_F32_BF8_FP8, + VOP3POp.V_DOT4_F32_FP8_FP8: _VOP3POp_V_DOT4_F32_FP8_FP8, + VOP3POp.V_DOT4_F32_BF8_BF8: _VOP3POp_V_DOT4_F32_BF8_BF8, +} + +def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.f16 < S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f16 < S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.f16 == S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f16 == S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f16 <= S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f16 <= S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.f16 > S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f16 > S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f16 <> S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f16 != S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f16 >= S1.f16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f16 >= S1.f16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC + # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC or a scalar register. + # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f16 >= S1.f16); + # // With NAN inputs this is not the same operation as < + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f16 >= S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f16 <> S1.f16); + # // With NAN inputs this is not the same operation as == + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f16 != S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC or a scalar register. + # D0.u64[laneId] = !(S0.f16 > S1.f16); + # // With NAN inputs this is not the same operation as <= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f16 > S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f16 <= S1.f16); + # // With NAN inputs this is not the same operation as > + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f16 <= S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = !(S0.f16 == S1.f16); + # // With NAN inputs this is not the same operation as != + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f16 == S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC + # D0.u64[laneId] = !(S0.f16 < S1.f16); + # // With NAN inputs this is not the same operation as >= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f16 < S1.f16) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.f32 < S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f32 < S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.f32 == S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f32 == S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f32 <= S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f32 <= S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.f32 > S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f32 > S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f32 <> S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f32 != S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f32 >= S1.f32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f32 >= S1.f32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC + # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC or a scalar register. + # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f32 >= S1.f32); + # // With NAN inputs this is not the same operation as < + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f32 >= S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f32 <> S1.f32); + # // With NAN inputs this is not the same operation as == + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f32 != S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC or a scalar register. + # D0.u64[laneId] = !(S0.f32 > S1.f32); + # // With NAN inputs this is not the same operation as <= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f32 > S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f32 <= S1.f32); + # // With NAN inputs this is not the same operation as > + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f32 <= S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = !(S0.f32 == S1.f32); + # // With NAN inputs this is not the same operation as != + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f32 == S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC + # D0.u64[laneId] = !(S0.f32 < S1.f32); + # // With NAN inputs this is not the same operation as >= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f32 < S1.f32) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.f64 < S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f64 < S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.f64 == S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f64 == S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f64 <= S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f64 <= S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.f64 > S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f64 > S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f64 <> S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f64 != S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.f64 >= S1.f64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.f64 >= S1.f64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC + # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC or a scalar register. + # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f64 >= S1.f64); + # // With NAN inputs this is not the same operation as < + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f64 >= S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f64 <> S1.f64); + # // With NAN inputs this is not the same operation as == + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f64 != S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # VCC or a scalar register. + # D0.u64[laneId] = !(S0.f64 > S1.f64); + # // With NAN inputs this is not the same operation as <= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f64 > S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = !(S0.f64 <= S1.f64); + # // With NAN inputs this is not the same operation as > + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f64 <= S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = !(S0.f64 == S1.f64); + # // With NAN inputs this is not the same operation as != + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f64 == S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC + # D0.u64[laneId] = !(S0.f64 < S1.f64); + # // With NAN inputs this is not the same operation as >= + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = not (S0.f64 < S1.f64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.i16 < S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i16 < S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.i16 == S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i16 == S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.i16 <= S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i16 <= S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.i16 > S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i16 > S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = S0.i16 <> S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i16 != S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.i16 >= S1.i16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i16 >= S1.i16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.u16 < S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u16 < S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.u16 == S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u16 == S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.u16 <= S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u16 <= S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.u16 > S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u16 > S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = S0.u16 <> S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u16 != S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.u16 >= S1.u16; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u16 >= S1.u16 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.i32 < S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i32 < S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.i32 == S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i32 == S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.i32 <= S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i32 <= S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.i32 > S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i32 > S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = S0.i32 <> S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i32 != S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.i32 >= S1.i32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i32 >= S1.i32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.u32 < S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u32 < S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.u32 == S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u32 == S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.u32 <= S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u32 <= S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.u32 > S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u32 > S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = S0.u32 <> S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u32 != S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.u32 >= S1.u32; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u32 >= S1.u32 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.i64 < S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i64 < S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.i64 == S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i64 == S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.i64 <= S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i64 <= S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.i64 > S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i64 > S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = S0.i64 <> S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i64 != S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.i64 >= S1.i64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.i64 >= S1.i64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.u64 < S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u64 < S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a + # D0.u64[laneId] = S0.u64 == S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u64 == S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.u64 <= S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u64 <= S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC + # D0.u64[laneId] = S0.u64 > S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u64 > S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC + # D0.u64[laneId] = S0.u64 <> S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u64 != S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # D0.u64[laneId] = S0.u64 >= S1.u64; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + D0.u64[laneId] = S0.u64 >= S1.u64 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar + # S1.u[0] value is a signaling NAN. + # S1.u[1] value is a quiet NAN. + # S1.u[2] value is negative infinity. + # S1.u[3] value is a negative normal value. + # S1.u[4] value is a negative denormal value. + # S1.u[5] value is negative zero. + # S1.u[6] value is positive zero. + # S1.u[7] value is a positive denormal value. + # S1.u[8] value is a positive normal value. + # S1.u[9] value is positive infinity. + # declare result : 1'U; + # if isSignalNAN(64'F(S0.f16)) then + # result = S1.u32[0] + # elsif isQuietNAN(64'F(S0.f16)) then + # result = S1.u32[1] + # elsif exponent(S0.f16) == 31 then + # // +-INF + # result = S1.u32[sign(S0.f16) ? 2 : 9] + # elsif exponent(S0.f16) > 0 then + # // +-normal value + # result = S1.u32[sign(S0.f16) ? 3 : 8] + # elsif 64'F(abs(S0.f16)) > 0.0 then + # // +-denormal value + # result = S1.u32[sign(S0.f16) ? 4 : 7] + # else + # // +-0.0 + # result = S1.u32[sign(S0.f16) ? 5 : 6] + # endif; + # D0.u64[laneId] = result; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + if isSignalNAN(F(S0.f16)): + result = S1.u32[0] + elif isQuietNAN(F(S0.f16)): + result = S1.u32[1] + elif exponent(S0.f16) == 31: + result = S1.u32[((2) if (sign(S0.f16)) else (9))] + elif exponent(S0.f16) > 0: + result = S1.u32[((3) if (sign(S0.f16)) else (8))] + elif F(abs(S0.f16)) > 0.0: + result = S1.u32[((4) if (sign(S0.f16)) else (7))] + else: + result = S1.u32[((5) if (sign(S0.f16)) else (6))] + D0.u64[laneId] = result + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar + # S1.u[0] value is a signaling NAN. + # S1.u[1] value is a quiet NAN. + # S1.u[2] value is negative infinity. + # S1.u[3] value is a negative normal value. + # S1.u[4] value is a negative denormal value. + # S1.u[5] value is negative zero. + # S1.u[6] value is positive zero. + # S1.u[7] value is a positive denormal value. + # S1.u[8] value is a positive normal value. + # S1.u[9] value is positive infinity. + # declare result : 1'U; + # if isSignalNAN(64'F(S0.f32)) then + # result = S1.u32[0] + # elsif isQuietNAN(64'F(S0.f32)) then + # result = S1.u32[1] + # elsif exponent(S0.f32) == 255 then + # // +-INF + # result = S1.u32[sign(S0.f32) ? 2 : 9] + # elsif exponent(S0.f32) > 0 then + # // +-normal value + # result = S1.u32[sign(S0.f32) ? 3 : 8] + # elsif 64'F(abs(S0.f32)) > 0.0 then + # // +-denormal value + # result = S1.u32[sign(S0.f32) ? 4 : 7] + # else + # // +-0.0 + # result = S1.u32[sign(S0.f32) ? 5 : 6] + # endif; + # D0.u64[laneId] = result; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + if isSignalNAN(F(S0.f32)): + result = S1.u32[0] + elif isQuietNAN(F(S0.f32)): + result = S1.u32[1] + elif exponent(S0.f32) == 255: + result = S1.u32[((2) if (sign(S0.f32)) else (9))] + elif exponent(S0.f32) > 0: + result = S1.u32[((3) if (sign(S0.f32)) else (8))] + elif F(abs(S0.f32)) > 0.0: + result = S1.u32[((4) if (sign(S0.f32)) else (7))] + else: + result = S1.u32[((5) if (sign(S0.f32)) else (6))] + D0.u64[laneId] = result + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar + # S1.u[0] value is a signaling NAN. + # S1.u[1] value is a quiet NAN. + # S1.u[2] value is negative infinity. + # S1.u[3] value is a negative normal value. + # S1.u[4] value is a negative denormal value. + # S1.u[5] value is negative zero. + # S1.u[6] value is positive zero. + # S1.u[7] value is a positive denormal value. + # S1.u[8] value is a positive normal value. + # S1.u[9] value is positive infinity. + # declare result : 1'U; + # if isSignalNAN(S0.f64) then + # result = S1.u32[0] + # elsif isQuietNAN(S0.f64) then + # result = S1.u32[1] + # elsif exponent(S0.f64) == 2047 then + # // +-INF + # result = S1.u32[sign(S0.f64) ? 2 : 9] + # elsif exponent(S0.f64) > 0 then + # // +-normal value + # result = S1.u32[sign(S0.f64) ? 3 : 8] + # elsif abs(S0.f64) > 0.0 then + # // +-denormal value + # result = S1.u32[sign(S0.f64) ? 4 : 7] + # else + # // +-0.0 + # result = S1.u32[sign(S0.f64) ? 5 : 6] + # endif; + # D0.u64[laneId] = result; + # // D0 = VCC in VOPC encoding. + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + VCC = Reg(vcc) + laneId = lane + # --- compiled pseudocode --- + if isSignalNAN(S0.f64): + result = S1.u32[0] + elif isQuietNAN(S0.f64): + result = S1.u32[1] + elif exponent(S0.f64) == 2047: + result = S1.u32[((2) if (sign(S0.f64)) else (9))] + elif exponent(S0.f64) > 0: + result = S1.u32[((3) if (sign(S0.f64)) else (8))] + elif abs(S0.f64) > 0.0: + result = S1.u32[((4) if (sign(S0.f64)) else (7))] + else: + result = S1.u32[((5) if (sign(S0.f64)) else (6))] + D0.u64[laneId] = result + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 + result['d0_64'] = True + return result + +def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f16 < S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f16 < S1.f16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = S0.f16 == S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f16 == S1.f16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f16 <= S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f16 <= S1.f16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f16 > S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f16 > S1.f16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f16 <> S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f16 != S1.f16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f16 >= S1.f16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f16 >= S1.f16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); + # // With NAN inputs this is not the same operation as < + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f16 >= S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); + # // With NAN inputs this is not the same operation as == + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f16 != S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f16 > S1.f16); + # // With NAN inputs this is not the same operation as <= + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f16 > S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); + # // With NAN inputs this is not the same operation as > + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f16 <= S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f16 == S1.f16); + # // With NAN inputs this is not the same operation as != + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f16 == S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f16 < S1.f16); + # // With NAN inputs this is not the same operation as >= + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f16 < S1.f16) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f32 < S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f32 < S1.f32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = S0.f32 == S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f32 == S1.f32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f32 <= S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f32 <= S1.f32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f32 > S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f32 > S1.f32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f32 <> S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f32 != S1.f32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f32 >= S1.f32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f32 >= S1.f32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); + # // With NAN inputs this is not the same operation as < + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f32 >= S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); + # // With NAN inputs this is not the same operation as == + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f32 != S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f32 > S1.f32); + # // With NAN inputs this is not the same operation as <= + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f32 > S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); + # // With NAN inputs this is not the same operation as > + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f32 <= S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f32 == S1.f32); + # // With NAN inputs this is not the same operation as != + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f32 == S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f32 < S1.f32); + # // With NAN inputs this is not the same operation as >= + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f32 < S1.f32) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f64 < S1.f64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f64 < S1.f64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = S0.f64 == S1.f64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f64 == S1.f64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f64 <= S1.f64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f64 <= S1.f64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f64 > S1.f64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f64 > S1.f64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f64 <> S1.f64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f64 != S1.f64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.f64 >= S1.f64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.f64 >= S1.f64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); + # // With NAN inputs this is not the same operation as < + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f64 >= S1.f64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); + # // With NAN inputs this is not the same operation as == + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f64 != S1.f64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f64 > S1.f64); + # // With NAN inputs this is not the same operation as <= + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f64 > S1.f64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); + # // With NAN inputs this is not the same operation as > + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f64 <= S1.f64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f64 == S1.f64); + # // With NAN inputs this is not the same operation as != + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f64 == S1.f64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = !(S0.f64 < S1.f64); + # // With NAN inputs this is not the same operation as >= + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = not (S0.f64 < S1.f64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i16 < S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i16 < S1.i16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = S0.i16 == S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i16 == S1.i16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i16 <= S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i16 <= S1.i16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i16 > S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i16 > S1.i16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i16 <> S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i16 != S1.i16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i16 >= S1.i16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i16 >= S1.i16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u16 < S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u16 < S1.u16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = S0.u16 == S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u16 == S1.u16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u16 <= S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u16 <= S1.u16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u16 > S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u16 > S1.u16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u16 <> S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u16 != S1.u16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u16 >= S1.u16 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u16 >= S1.u16 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i32 < S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i32 < S1.i32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = S0.i32 == S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i32 == S1.i32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i32 <= S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i32 <= S1.i32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i32 > S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i32 > S1.i32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i32 <> S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i32 != S1.i32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i32 >= S1.i32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i32 >= S1.i32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u32 < S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u32 < S1.u32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = S0.u32 == S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u32 == S1.u32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u32 <= S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u32 <= S1.u32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u32 > S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u32 > S1.u32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u32 <> S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u32 != S1.u32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u32 >= S1.u32 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u32 >= S1.u32 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i64 < S1.i64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i64 < S1.i64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = S0.i64 == S1.i64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i64 == S1.i64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i64 <= S1.i64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i64 <= S1.i64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i64 > S1.i64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i64 > S1.i64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i64 <> S1.i64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i64 != S1.i64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.i64 >= S1.i64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.i64 >= S1.i64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u64 < S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u64 < S1.u64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC + # EXEC.u64[laneId] = S0.u64 == S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u64 == S1.u64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u64 <= S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u64 <= S1.u64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u64 > S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u64 > S1.u64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u64 <> S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u64 != S1.u64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # EXEC.u64[laneId] = S0.u64 >= S1.u64 + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + EXEC.u64[laneId] = S0.u64 >= S1.u64 + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # S1.u[0] value is a signaling NAN. + # S1.u[1] value is a quiet NAN. + # S1.u[2] value is negative infinity. + # S1.u[3] value is a negative normal value. + # S1.u[4] value is a negative denormal value. + # S1.u[5] value is negative zero. + # S1.u[6] value is positive zero. + # S1.u[7] value is a positive denormal value. + # S1.u[8] value is a positive normal value. + # S1.u[9] value is positive infinity. + # declare result : 1'U; + # if isSignalNAN(64'F(S0.f16)) then + # result = S1.u32[0] + # elsif isQuietNAN(64'F(S0.f16)) then + # result = S1.u32[1] + # elsif exponent(S0.f16) == 31 then + # // +-INF + # result = S1.u32[sign(S0.f16) ? 2 : 9] + # elsif exponent(S0.f16) > 0 then + # // +-normal value + # result = S1.u32[sign(S0.f16) ? 3 : 8] + # elsif 64'F(abs(S0.f16)) > 0.0 then + # // +-denormal value + # result = S1.u32[sign(S0.f16) ? 4 : 7] + # else + # // +-0.0 + # result = S1.u32[sign(S0.f16) ? 5 : 6] + # endif; + # EXEC.u64[laneId] = result + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + if isSignalNAN(F(S0.f16)): + result = S1.u32[0] + elif isQuietNAN(F(S0.f16)): + result = S1.u32[1] + elif exponent(S0.f16) == 31: + result = S1.u32[((2) if (sign(S0.f16)) else (9))] + elif exponent(S0.f16) > 0: + result = S1.u32[((3) if (sign(S0.f16)) else (8))] + elif F(abs(S0.f16)) > 0.0: + result = S1.u32[((4) if (sign(S0.f16)) else (7))] + else: + result = S1.u32[((5) if (sign(S0.f16)) else (6))] + EXEC.u64[laneId] = result + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # S1.u[0] value is a signaling NAN. + # S1.u[1] value is a quiet NAN. + # S1.u[2] value is negative infinity. + # S1.u[3] value is a negative normal value. + # S1.u[4] value is a negative denormal value. + # S1.u[5] value is negative zero. + # S1.u[6] value is positive zero. + # S1.u[7] value is a positive denormal value. + # S1.u[8] value is a positive normal value. + # S1.u[9] value is positive infinity. + # declare result : 1'U; + # if isSignalNAN(64'F(S0.f32)) then + # result = S1.u32[0] + # elsif isQuietNAN(64'F(S0.f32)) then + # result = S1.u32[1] + # elsif exponent(S0.f32) == 255 then + # // +-INF + # result = S1.u32[sign(S0.f32) ? 2 : 9] + # elsif exponent(S0.f32) > 0 then + # // +-normal value + # result = S1.u32[sign(S0.f32) ? 3 : 8] + # elsif 64'F(abs(S0.f32)) > 0.0 then + # // +-denormal value + # result = S1.u32[sign(S0.f32) ? 4 : 7] + # else + # // +-0.0 + # result = S1.u32[sign(S0.f32) ? 5 : 6] + # endif; + # EXEC.u64[laneId] = result + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + if isSignalNAN(F(S0.f32)): + result = S1.u32[0] + elif isQuietNAN(F(S0.f32)): + result = S1.u32[1] + elif exponent(S0.f32) == 255: + result = S1.u32[((2) if (sign(S0.f32)) else (9))] + elif exponent(S0.f32) > 0: + result = S1.u32[((3) if (sign(S0.f32)) else (8))] + elif F(abs(S0.f32)) > 0.0: + result = S1.u32[((4) if (sign(S0.f32)) else (7))] + else: + result = S1.u32[((5) if (sign(S0.f32)) else (6))] + EXEC.u64[laneId] = result + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + # S1.u[0] value is a signaling NAN. + # S1.u[1] value is a quiet NAN. + # S1.u[2] value is negative infinity. + # S1.u[3] value is a negative normal value. + # S1.u[4] value is a negative denormal value. + # S1.u[5] value is negative zero. + # S1.u[6] value is positive zero. + # S1.u[7] value is a positive denormal value. + # S1.u[8] value is a positive normal value. + # S1.u[9] value is positive infinity. + # declare result : 1'U; + # if isSignalNAN(S0.f64) then + # result = S1.u32[0] + # elsif isQuietNAN(S0.f64) then + # result = S1.u32[1] + # elsif exponent(S0.f64) == 2047 then + # // +-INF + # result = S1.u32[sign(S0.f64) ? 2 : 9] + # elsif exponent(S0.f64) > 0 then + # // +-normal value + # result = S1.u32[sign(S0.f64) ? 3 : 8] + # elsif abs(S0.f64) > 0.0 then + # // +-denormal value + # result = S1.u32[sign(S0.f64) ? 4 : 7] + # else + # // +-0.0 + # result = S1.u32[sign(S0.f64) ? 5 : 6] + # endif; + # EXEC.u64[laneId] = result + S0 = Reg(s0) + S1 = Reg(s1) + EXEC = Reg(exec_mask) + laneId = lane + # --- compiled pseudocode --- + if isSignalNAN(S0.f64): + result = S1.u32[0] + elif isQuietNAN(S0.f64): + result = S1.u32[1] + elif exponent(S0.f64) == 2047: + result = S1.u32[((2) if (sign(S0.f64)) else (9))] + elif exponent(S0.f64) > 0: + result = S1.u32[((3) if (sign(S0.f64)) else (8))] + elif abs(S0.f64) > 0.0: + result = S1.u32[((4) if (sign(S0.f64)) else (7))] + else: + result = S1.u32[((5) if (sign(S0.f64)) else (6))] + EXEC.u64[laneId] = result + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + result['exec_lane'] = (EXEC._val >> lane) & 1 + return result + +VOPCOp_FUNCTIONS = { + VOPCOp.V_CMP_LT_F16: _VOPCOp_V_CMP_LT_F16, + VOPCOp.V_CMP_EQ_F16: _VOPCOp_V_CMP_EQ_F16, + VOPCOp.V_CMP_LE_F16: _VOPCOp_V_CMP_LE_F16, + VOPCOp.V_CMP_GT_F16: _VOPCOp_V_CMP_GT_F16, + VOPCOp.V_CMP_LG_F16: _VOPCOp_V_CMP_LG_F16, + VOPCOp.V_CMP_GE_F16: _VOPCOp_V_CMP_GE_F16, + VOPCOp.V_CMP_O_F16: _VOPCOp_V_CMP_O_F16, + VOPCOp.V_CMP_U_F16: _VOPCOp_V_CMP_U_F16, + VOPCOp.V_CMP_NGE_F16: _VOPCOp_V_CMP_NGE_F16, + VOPCOp.V_CMP_NLG_F16: _VOPCOp_V_CMP_NLG_F16, + VOPCOp.V_CMP_NGT_F16: _VOPCOp_V_CMP_NGT_F16, + VOPCOp.V_CMP_NLE_F16: _VOPCOp_V_CMP_NLE_F16, + VOPCOp.V_CMP_NEQ_F16: _VOPCOp_V_CMP_NEQ_F16, + VOPCOp.V_CMP_NLT_F16: _VOPCOp_V_CMP_NLT_F16, + VOPCOp.V_CMP_LT_F32: _VOPCOp_V_CMP_LT_F32, + VOPCOp.V_CMP_EQ_F32: _VOPCOp_V_CMP_EQ_F32, + VOPCOp.V_CMP_LE_F32: _VOPCOp_V_CMP_LE_F32, + VOPCOp.V_CMP_GT_F32: _VOPCOp_V_CMP_GT_F32, + VOPCOp.V_CMP_LG_F32: _VOPCOp_V_CMP_LG_F32, + VOPCOp.V_CMP_GE_F32: _VOPCOp_V_CMP_GE_F32, + VOPCOp.V_CMP_O_F32: _VOPCOp_V_CMP_O_F32, + VOPCOp.V_CMP_U_F32: _VOPCOp_V_CMP_U_F32, + VOPCOp.V_CMP_NGE_F32: _VOPCOp_V_CMP_NGE_F32, + VOPCOp.V_CMP_NLG_F32: _VOPCOp_V_CMP_NLG_F32, + VOPCOp.V_CMP_NGT_F32: _VOPCOp_V_CMP_NGT_F32, + VOPCOp.V_CMP_NLE_F32: _VOPCOp_V_CMP_NLE_F32, + VOPCOp.V_CMP_NEQ_F32: _VOPCOp_V_CMP_NEQ_F32, + VOPCOp.V_CMP_NLT_F32: _VOPCOp_V_CMP_NLT_F32, + VOPCOp.V_CMP_LT_F64: _VOPCOp_V_CMP_LT_F64, + VOPCOp.V_CMP_EQ_F64: _VOPCOp_V_CMP_EQ_F64, + VOPCOp.V_CMP_LE_F64: _VOPCOp_V_CMP_LE_F64, + VOPCOp.V_CMP_GT_F64: _VOPCOp_V_CMP_GT_F64, + VOPCOp.V_CMP_LG_F64: _VOPCOp_V_CMP_LG_F64, + VOPCOp.V_CMP_GE_F64: _VOPCOp_V_CMP_GE_F64, + VOPCOp.V_CMP_O_F64: _VOPCOp_V_CMP_O_F64, + VOPCOp.V_CMP_U_F64: _VOPCOp_V_CMP_U_F64, + VOPCOp.V_CMP_NGE_F64: _VOPCOp_V_CMP_NGE_F64, + VOPCOp.V_CMP_NLG_F64: _VOPCOp_V_CMP_NLG_F64, + VOPCOp.V_CMP_NGT_F64: _VOPCOp_V_CMP_NGT_F64, + VOPCOp.V_CMP_NLE_F64: _VOPCOp_V_CMP_NLE_F64, + VOPCOp.V_CMP_NEQ_F64: _VOPCOp_V_CMP_NEQ_F64, + VOPCOp.V_CMP_NLT_F64: _VOPCOp_V_CMP_NLT_F64, + VOPCOp.V_CMP_LT_I16: _VOPCOp_V_CMP_LT_I16, + VOPCOp.V_CMP_EQ_I16: _VOPCOp_V_CMP_EQ_I16, + VOPCOp.V_CMP_LE_I16: _VOPCOp_V_CMP_LE_I16, + VOPCOp.V_CMP_GT_I16: _VOPCOp_V_CMP_GT_I16, + VOPCOp.V_CMP_NE_I16: _VOPCOp_V_CMP_NE_I16, + VOPCOp.V_CMP_GE_I16: _VOPCOp_V_CMP_GE_I16, + VOPCOp.V_CMP_LT_U16: _VOPCOp_V_CMP_LT_U16, + VOPCOp.V_CMP_EQ_U16: _VOPCOp_V_CMP_EQ_U16, + VOPCOp.V_CMP_LE_U16: _VOPCOp_V_CMP_LE_U16, + VOPCOp.V_CMP_GT_U16: _VOPCOp_V_CMP_GT_U16, + VOPCOp.V_CMP_NE_U16: _VOPCOp_V_CMP_NE_U16, + VOPCOp.V_CMP_GE_U16: _VOPCOp_V_CMP_GE_U16, + VOPCOp.V_CMP_LT_I32: _VOPCOp_V_CMP_LT_I32, + VOPCOp.V_CMP_EQ_I32: _VOPCOp_V_CMP_EQ_I32, + VOPCOp.V_CMP_LE_I32: _VOPCOp_V_CMP_LE_I32, + VOPCOp.V_CMP_GT_I32: _VOPCOp_V_CMP_GT_I32, + VOPCOp.V_CMP_NE_I32: _VOPCOp_V_CMP_NE_I32, + VOPCOp.V_CMP_GE_I32: _VOPCOp_V_CMP_GE_I32, + VOPCOp.V_CMP_LT_U32: _VOPCOp_V_CMP_LT_U32, + VOPCOp.V_CMP_EQ_U32: _VOPCOp_V_CMP_EQ_U32, + VOPCOp.V_CMP_LE_U32: _VOPCOp_V_CMP_LE_U32, + VOPCOp.V_CMP_GT_U32: _VOPCOp_V_CMP_GT_U32, + VOPCOp.V_CMP_NE_U32: _VOPCOp_V_CMP_NE_U32, + VOPCOp.V_CMP_GE_U32: _VOPCOp_V_CMP_GE_U32, + VOPCOp.V_CMP_LT_I64: _VOPCOp_V_CMP_LT_I64, + VOPCOp.V_CMP_EQ_I64: _VOPCOp_V_CMP_EQ_I64, + VOPCOp.V_CMP_LE_I64: _VOPCOp_V_CMP_LE_I64, + VOPCOp.V_CMP_GT_I64: _VOPCOp_V_CMP_GT_I64, + VOPCOp.V_CMP_NE_I64: _VOPCOp_V_CMP_NE_I64, + VOPCOp.V_CMP_GE_I64: _VOPCOp_V_CMP_GE_I64, + VOPCOp.V_CMP_LT_U64: _VOPCOp_V_CMP_LT_U64, + VOPCOp.V_CMP_EQ_U64: _VOPCOp_V_CMP_EQ_U64, + VOPCOp.V_CMP_LE_U64: _VOPCOp_V_CMP_LE_U64, + VOPCOp.V_CMP_GT_U64: _VOPCOp_V_CMP_GT_U64, + VOPCOp.V_CMP_NE_U64: _VOPCOp_V_CMP_NE_U64, + VOPCOp.V_CMP_GE_U64: _VOPCOp_V_CMP_GE_U64, + VOPCOp.V_CMP_CLASS_F16: _VOPCOp_V_CMP_CLASS_F16, + VOPCOp.V_CMP_CLASS_F32: _VOPCOp_V_CMP_CLASS_F32, + VOPCOp.V_CMP_CLASS_F64: _VOPCOp_V_CMP_CLASS_F64, + VOPCOp.V_CMPX_LT_F16: _VOPCOp_V_CMPX_LT_F16, + VOPCOp.V_CMPX_EQ_F16: _VOPCOp_V_CMPX_EQ_F16, + VOPCOp.V_CMPX_LE_F16: _VOPCOp_V_CMPX_LE_F16, + VOPCOp.V_CMPX_GT_F16: _VOPCOp_V_CMPX_GT_F16, + VOPCOp.V_CMPX_LG_F16: _VOPCOp_V_CMPX_LG_F16, + VOPCOp.V_CMPX_GE_F16: _VOPCOp_V_CMPX_GE_F16, + VOPCOp.V_CMPX_O_F16: _VOPCOp_V_CMPX_O_F16, + VOPCOp.V_CMPX_U_F16: _VOPCOp_V_CMPX_U_F16, + VOPCOp.V_CMPX_NGE_F16: _VOPCOp_V_CMPX_NGE_F16, + VOPCOp.V_CMPX_NLG_F16: _VOPCOp_V_CMPX_NLG_F16, + VOPCOp.V_CMPX_NGT_F16: _VOPCOp_V_CMPX_NGT_F16, + VOPCOp.V_CMPX_NLE_F16: _VOPCOp_V_CMPX_NLE_F16, + VOPCOp.V_CMPX_NEQ_F16: _VOPCOp_V_CMPX_NEQ_F16, + VOPCOp.V_CMPX_NLT_F16: _VOPCOp_V_CMPX_NLT_F16, + VOPCOp.V_CMPX_LT_F32: _VOPCOp_V_CMPX_LT_F32, + VOPCOp.V_CMPX_EQ_F32: _VOPCOp_V_CMPX_EQ_F32, + VOPCOp.V_CMPX_LE_F32: _VOPCOp_V_CMPX_LE_F32, + VOPCOp.V_CMPX_GT_F32: _VOPCOp_V_CMPX_GT_F32, + VOPCOp.V_CMPX_LG_F32: _VOPCOp_V_CMPX_LG_F32, + VOPCOp.V_CMPX_GE_F32: _VOPCOp_V_CMPX_GE_F32, + VOPCOp.V_CMPX_O_F32: _VOPCOp_V_CMPX_O_F32, + VOPCOp.V_CMPX_U_F32: _VOPCOp_V_CMPX_U_F32, + VOPCOp.V_CMPX_NGE_F32: _VOPCOp_V_CMPX_NGE_F32, + VOPCOp.V_CMPX_NLG_F32: _VOPCOp_V_CMPX_NLG_F32, + VOPCOp.V_CMPX_NGT_F32: _VOPCOp_V_CMPX_NGT_F32, + VOPCOp.V_CMPX_NLE_F32: _VOPCOp_V_CMPX_NLE_F32, + VOPCOp.V_CMPX_NEQ_F32: _VOPCOp_V_CMPX_NEQ_F32, + VOPCOp.V_CMPX_NLT_F32: _VOPCOp_V_CMPX_NLT_F32, + VOPCOp.V_CMPX_LT_F64: _VOPCOp_V_CMPX_LT_F64, + VOPCOp.V_CMPX_EQ_F64: _VOPCOp_V_CMPX_EQ_F64, + VOPCOp.V_CMPX_LE_F64: _VOPCOp_V_CMPX_LE_F64, + VOPCOp.V_CMPX_GT_F64: _VOPCOp_V_CMPX_GT_F64, + VOPCOp.V_CMPX_LG_F64: _VOPCOp_V_CMPX_LG_F64, + VOPCOp.V_CMPX_GE_F64: _VOPCOp_V_CMPX_GE_F64, + VOPCOp.V_CMPX_O_F64: _VOPCOp_V_CMPX_O_F64, + VOPCOp.V_CMPX_U_F64: _VOPCOp_V_CMPX_U_F64, + VOPCOp.V_CMPX_NGE_F64: _VOPCOp_V_CMPX_NGE_F64, + VOPCOp.V_CMPX_NLG_F64: _VOPCOp_V_CMPX_NLG_F64, + VOPCOp.V_CMPX_NGT_F64: _VOPCOp_V_CMPX_NGT_F64, + VOPCOp.V_CMPX_NLE_F64: _VOPCOp_V_CMPX_NLE_F64, + VOPCOp.V_CMPX_NEQ_F64: _VOPCOp_V_CMPX_NEQ_F64, + VOPCOp.V_CMPX_NLT_F64: _VOPCOp_V_CMPX_NLT_F64, + VOPCOp.V_CMPX_LT_I16: _VOPCOp_V_CMPX_LT_I16, + VOPCOp.V_CMPX_EQ_I16: _VOPCOp_V_CMPX_EQ_I16, + VOPCOp.V_CMPX_LE_I16: _VOPCOp_V_CMPX_LE_I16, + VOPCOp.V_CMPX_GT_I16: _VOPCOp_V_CMPX_GT_I16, + VOPCOp.V_CMPX_NE_I16: _VOPCOp_V_CMPX_NE_I16, + VOPCOp.V_CMPX_GE_I16: _VOPCOp_V_CMPX_GE_I16, + VOPCOp.V_CMPX_LT_U16: _VOPCOp_V_CMPX_LT_U16, + VOPCOp.V_CMPX_EQ_U16: _VOPCOp_V_CMPX_EQ_U16, + VOPCOp.V_CMPX_LE_U16: _VOPCOp_V_CMPX_LE_U16, + VOPCOp.V_CMPX_GT_U16: _VOPCOp_V_CMPX_GT_U16, + VOPCOp.V_CMPX_NE_U16: _VOPCOp_V_CMPX_NE_U16, + VOPCOp.V_CMPX_GE_U16: _VOPCOp_V_CMPX_GE_U16, + VOPCOp.V_CMPX_LT_I32: _VOPCOp_V_CMPX_LT_I32, + VOPCOp.V_CMPX_EQ_I32: _VOPCOp_V_CMPX_EQ_I32, + VOPCOp.V_CMPX_LE_I32: _VOPCOp_V_CMPX_LE_I32, + VOPCOp.V_CMPX_GT_I32: _VOPCOp_V_CMPX_GT_I32, + VOPCOp.V_CMPX_NE_I32: _VOPCOp_V_CMPX_NE_I32, + VOPCOp.V_CMPX_GE_I32: _VOPCOp_V_CMPX_GE_I32, + VOPCOp.V_CMPX_LT_U32: _VOPCOp_V_CMPX_LT_U32, + VOPCOp.V_CMPX_EQ_U32: _VOPCOp_V_CMPX_EQ_U32, + VOPCOp.V_CMPX_LE_U32: _VOPCOp_V_CMPX_LE_U32, + VOPCOp.V_CMPX_GT_U32: _VOPCOp_V_CMPX_GT_U32, + VOPCOp.V_CMPX_NE_U32: _VOPCOp_V_CMPX_NE_U32, + VOPCOp.V_CMPX_GE_U32: _VOPCOp_V_CMPX_GE_U32, + VOPCOp.V_CMPX_LT_I64: _VOPCOp_V_CMPX_LT_I64, + VOPCOp.V_CMPX_EQ_I64: _VOPCOp_V_CMPX_EQ_I64, + VOPCOp.V_CMPX_LE_I64: _VOPCOp_V_CMPX_LE_I64, + VOPCOp.V_CMPX_GT_I64: _VOPCOp_V_CMPX_GT_I64, + VOPCOp.V_CMPX_NE_I64: _VOPCOp_V_CMPX_NE_I64, + VOPCOp.V_CMPX_GE_I64: _VOPCOp_V_CMPX_GE_I64, + VOPCOp.V_CMPX_LT_U64: _VOPCOp_V_CMPX_LT_U64, + VOPCOp.V_CMPX_EQ_U64: _VOPCOp_V_CMPX_EQ_U64, + VOPCOp.V_CMPX_LE_U64: _VOPCOp_V_CMPX_LE_U64, + VOPCOp.V_CMPX_GT_U64: _VOPCOp_V_CMPX_GT_U64, + VOPCOp.V_CMPX_NE_U64: _VOPCOp_V_CMPX_NE_U64, + VOPCOp.V_CMPX_GE_U64: _VOPCOp_V_CMPX_GE_U64, + VOPCOp.V_CMPX_CLASS_F16: _VOPCOp_V_CMPX_CLASS_F16, + VOPCOp.V_CMPX_CLASS_F32: _VOPCOp_V_CMPX_CLASS_F32, + VOPCOp.V_CMPX_CLASS_F64: _VOPCOp_V_CMPX_CLASS_F64, +} + + +# V_WRITELANE_B32: Write scalar to specific lane's VGPR (not in PDF pseudocode) +def _VOP3Op_V_WRITELANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): + wr_lane = s1 & 0x1f # lane select (5 bits for wave32) + return {'d0': d0, 'scc': scc, 'vgpr_write': (wr_lane, vdst_idx, s0 & 0xffffffff)} +VOP3Op_FUNCTIONS[VOP3Op.V_WRITELANE_B32] = _VOP3Op_V_WRITELANE_B32 + +COMPILED_FUNCTIONS = { + SOP1Op: SOP1Op_FUNCTIONS, + SOP2Op: SOP2Op_FUNCTIONS, + SOPCOp: SOPCOp_FUNCTIONS, + SOPKOp: SOPKOp_FUNCTIONS, + SOPPOp: SOPPOp_FUNCTIONS, + VOP1Op: VOP1Op_FUNCTIONS, + VOP2Op: VOP2Op_FUNCTIONS, + VOP3Op: VOP3Op_FUNCTIONS, + VOP3SDOp: VOP3SDOp_FUNCTIONS, + VOP3POp: VOP3POp_FUNCTIONS, + VOPCOp: VOPCOp_FUNCTIONS, +} + +def get_compiled_functions(): return COMPILED_FUNCTIONS \ No newline at end of file diff --git a/extra/assembly/amd/lib.py b/extra/assembly/amd/dsl.py similarity index 84% rename from extra/assembly/amd/lib.py rename to extra/assembly/amd/dsl.py index 20e066b7de..2988073f3e 100644 --- a/extra/assembly/amd/lib.py +++ b/extra/assembly/amd/dsl.py @@ -289,8 +289,10 @@ class Inst64(Inst): pass # ═══════════════════════════════════════════════════════════════════════════════ PDF_URLS = { - "rdna3": "https://docs.amd.com/api/khub/documents/UVVZM22UN7tMUeiW_4ShTQ/content", - "cdna4": "https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-cdna4-instruction-set-architecture.pdf", + "rdna3": "https://docs.amd.com/api/khub/documents/UVVZM22UN7tMUeiW_4ShTQ/content", # RDNA3.5 + "rdna4": "https://docs.amd.com/api/khub/documents/uQpkEvk3pv~kfAb2x~j4uw/content", + "cdna": ["https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-mi300-cdna3-instruction-set-architecture.pdf", + "https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/instruction-set-architectures/amd-instinct-cdna4-instruction-set-architecture.pdf"], } FIELD_TYPES = {'SSRC0': 'SSrc', 'SSRC1': 'SSrc', 'SOFFSET': 'SSrc', 'SADDR': 'SSrc', 'SRC0': 'Src', 'SRC1': 'Src', 'SRC2': 'Src', 'SDST': 'SGPRField', 'SBASE': 'SGPRField', 'SDATA': 'SGPRField', 'SRSRC': 'SGPRField', 'VDST': 'VGPRField', 'VSRC1': 'VGPRField', 'VDATA': 'VGPRField', @@ -338,28 +340,34 @@ def _parse_fields_table(table: list, fmt: str, enums: set[str]) -> list[tuple]: fields.append((name, hi, lo, enc_val, ftype)) return fields -def generate(output_path: str | None = None, arch: str = "rdna3") -> dict: - """Generate instruction definitions from AMD ISA PDF. Returns dict with formats for testing.""" +def _parse_single_pdf(url: str) -> dict: + """Parse a single PDF and return raw data (formats, enums, src_enum, doc_name, is_cdna).""" import re, pdfplumber from tinygrad.helpers import fetch - pdf = pdfplumber.open(fetch(PDF_URLS[arch])) + pdf = pdfplumber.open(fetch(url)) # Auto-detect document type from first page first_page_text = pdf.pages[0].extract_text() or '' is_cdna4 = 'CDNA4' in first_page_text or 'CDNA 4' in first_page_text - doc_name = "CDNA4" if is_cdna4 else "RDNA3.5" + is_cdna3 = 'CDNA3' in first_page_text or 'CDNA 3' in first_page_text or 'MI300' in first_page_text + is_cdna = is_cdna3 or is_cdna4 + is_rdna4 = 'RDNA4' in first_page_text or 'RDNA 4' in first_page_text + is_rdna35 = 'RDNA3.5' in first_page_text or 'RDNA 3.5' in first_page_text # Check 3.5 before 3 + is_rdna3 = not is_rdna35 and ('RDNA3' in first_page_text or 'RDNA 3' in first_page_text) + doc_name = "CDNA4" if is_cdna4 else "CDNA3" if is_cdna3 else "RDNA4" if is_rdna4 else "RDNA3.5" if is_rdna35 else "RDNA3" if is_rdna3 else "Unknown" - # Find the "Microcode Formats" section by searching the PDF - # Look for "Chapter X. Microcode Formats" (RDNA3) or first format subsection header (CDNA4) + # Find the "Microcode Formats" section - search for SOP2 format definition microcode_start = None - for i, page in enumerate(pdf.pages): - text = page.extract_text() or '' - if re.search(r'Chapter \d+\.\s+Microcode Formats', text) or \ - (i > 100 and re.search(r'^\d+\.\d+\.\d+\.\s+SOP2\s*\n', text, re.M)): + total_pages = len(pdf.pages) + # Search from likely locations (formats are typically 20-95% through the document - RDNA3 has them at ~25%) + for i in range(int(total_pages * 0.2), total_pages): + text = pdf.pages[i].extract_text() or '' + # Look for "X.Y.Z. SOP2" section header or "Chapter X. Microcode Formats" + if re.search(r'\d+\.\d+\.\d+\.\s+SOP2\b', text) or re.search(r'Chapter \d+\.\s+Microcode Formats', text): microcode_start = i break - if microcode_start is None: microcode_start = 150 # fallback for RDNA3.5 + if microcode_start is None: microcode_start = int(total_pages * 0.9) pages = pdf.pages[microcode_start:microcode_start + 50] page_texts = [p.extract_text() or '' for p in pages] @@ -392,16 +400,13 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict: return (pos := text.find('Field Name')) != -1 and bool(re.search(r'\d+\.\d+\.\d+\.\s+\w+\s*\n', text[:pos])) # find format headers with their page indices - format_headers = [] # (fmt_name, page_idx, header_pos) + format_headers = [] for i, text in enumerate(page_texts): - # Match "X.Y.Z. FORMAT_NAME" followed by Description (RDNA3) or newline (CDNA4) for m in re.finditer(r'\d+\.\d+\.\d+\.\s+(\w+)\s*\n?Description', text): format_headers.append((m.group(1), i, m.start())) for m in re.finditer(r'\d+\.\d+\.\d+\.\s+(\w+)\s*\n', text): fmt_name = m.group(1) - # For CDNA4: accept uppercase format names (SOP2, VOP1, etc) directly - if is_cdna4 and fmt_name.isupper() and len(fmt_name) >= 2: + if is_cdna and fmt_name.isupper() and len(fmt_name) >= 2: format_headers.append((fmt_name, i, m.start())) - # For RDNA3: check for Description on next page elif m.start() > len(text) - 200 and 'Description' not in text[m.end():] and i + 1 < len(page_texts): next_text = page_texts[i + 1].lstrip() if next_text.startswith('Description') or (next_text.startswith('"RDNA') and 'Description' in next_text[:200]): @@ -414,7 +419,6 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict: text, tables = page_texts[page_idx], page_tables[page_idx] field_pos = text.find('Field Name', header_pos) - # find fields table with ENCODING (same page or up to 2 pages ahead) fields = None for offset in range(3): if page_idx + offset >= len(pages): break @@ -425,7 +429,6 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict: break if fields: break - # for modifier formats (no ENCODING), accept first fields table on same page if not fields and field_pos > header_pos: for t in tables: if is_fields_table(t) and (f := _parse_fields_table(t, fmt_name, enum_names)): @@ -435,7 +438,6 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict: if not fields: continue field_names = {f[0] for f in fields} - # check next pages for continuation fields (tables without ENCODING) for pg_offset in range(1, 3): if page_idx + pg_offset >= len(pages) or has_header_before_fields(page_texts[page_idx + pg_offset]): break for t in page_tables[page_idx + pg_offset]: @@ -447,19 +449,70 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict: break formats[fmt_name] = fields - # fix known PDF errors (verified against LLVM test vectors) - # SMEM: PDF says DLC=bit14, GLC=bit16 but actual encoding is DLC=bit13, GLC=bit14 + # fix known PDF errors if 'SMEM' in formats: formats['SMEM'] = [(n, 13 if n == 'DLC' else 14 if n == 'GLC' else h, 13 if n == 'DLC' else 14 if n == 'GLC' else l, e, t) for n, h, l, e, t in formats['SMEM']] + return {"formats": formats, "enums": enums, "src_enum": src_enum, "doc_name": doc_name, "is_cdna": is_cdna} + +def _merge_results(results: list[dict]) -> dict: + """Merge multiple PDF parse results into a superset. Asserts if any conflicts.""" + merged = {"formats": {}, "enums": {}, "src_enum": dict(SRC_EXTRAS), "doc_names": [], "is_cdna": False} + for r in results: + merged["doc_names"].append(r["doc_name"]) + merged["is_cdna"] = merged["is_cdna"] or r["is_cdna"] + # Merge src_enum (union, assert no conflicts) + for val, name in r["src_enum"].items(): + if val in merged["src_enum"]: + assert merged["src_enum"][val] == name, f"SrcEnum conflict: {val} = {merged['src_enum'][val]} vs {name}" + else: + merged["src_enum"][val] = name + # Merge enums (union of ops per enum, assert no conflicts) + for enum_name, ops in r["enums"].items(): + if enum_name not in merged["enums"]: merged["enums"][enum_name] = {} + for val, name in ops.items(): + if val in merged["enums"][enum_name]: + assert merged["enums"][enum_name][val] == name, f"{enum_name} conflict: {val} = {merged['enums'][enum_name][val]} vs {name}" + else: + merged["enums"][enum_name][val] = name + # Merge formats (union of fields, assert no bit position conflicts for same field name) + for fmt_name, fields in r["formats"].items(): + if fmt_name not in merged["formats"]: + merged["formats"][fmt_name] = list(fields) + else: + existing = {f[0]: (f[1], f[2]) for f in merged["formats"][fmt_name]} # name -> (hi, lo) + for f in fields: + name, hi, lo = f[0], f[1], f[2] + if name in existing: + assert existing[name] == (hi, lo), f"Format {fmt_name} field {name} conflict: bits {existing[name]} vs ({hi}, {lo})" + else: + merged["formats"][fmt_name].append(f) + return merged + +def generate(output_path: str | None = None, arch: str = "rdna3") -> dict: + """Generate instruction definitions from AMD ISA PDF(s). Returns dict with formats for testing.""" + urls = PDF_URLS[arch] + if isinstance(urls, str): urls = [urls] + + # Parse all PDFs and merge + results = [_parse_single_pdf(url) for url in urls] + if len(results) == 1: + merged = results[0] + doc_name = merged["doc_name"] + else: + merged = _merge_results(results) + doc_name = "+".join(merged["doc_names"]) + + formats, enums, src_enum = merged["formats"], merged["enums"], merged["src_enum"] + # generate output def enum_lines(name, items): return [f"class {name}(IntEnum):"] + [f" {n} = {v}" for v, n in sorted(items.items())] + [""] def field_key(f): return order.index(f[0].lower()) if f[0].lower() in order else 1000 - lines = [f"# autogenerated from AMD {doc_name} ISA PDF by lib.py - do not edit", "from enum import IntEnum", + lines = [f"# autogenerated from AMD {doc_name} ISA PDF by dsl.py - do not edit", "from enum import IntEnum", "from typing import Annotated", - "from extra.assembly.amd.lib import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField", + "from extra.assembly.amd.dsl import bits, BitField, Inst32, Inst64, SGPR, VGPR, TTMP as TTMP, s as s, v as v, ttmp as ttmp, SSrc, Src, SImm, Imm, VDSTYEnc, SGPRField, VGPRField", "import functools", ""] lines += enum_lines("SrcEnum", src_enum) + sum([enum_lines(n, ops) for n, ops in sorted(enums.items())], []) # Format-specific field defaults (verified against LLVM test vectors) @@ -475,7 +528,6 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict: if defaults := format_defaults.get(fmt_name): lines.append(f" _defaults = {defaults}") for name, hi, lo, _, ftype in sorted([f for f in fields if f[0] != 'ENCODING'], key=field_key): - # Wrap IntEnum types (ending in Op) with Annotated[BitField, ...] for correct typing if ftype and ftype.endswith('Op'): ann = f":Annotated[BitField, {ftype}]" else: @@ -489,23 +541,18 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict: seg = {"GLOBAL": ", seg=2", "SCRATCH": ", seg=2"}.get(fmt, "") tgt = {"GLOBAL": "FLAT, GLOBALOp", "SCRATCH": "FLAT, SCRATCHOp"}.get(fmt, f"{fmt}, {cls_name}") if fmt in formats or fmt in ("GLOBAL", "SCRATCH"): - # VOP1/VOP2/VOPC get _e32 suffix, VOP3 promoted ops (< 512) get _e64 suffix if fmt in ("VOP1", "VOP2", "VOPC"): suffix = "_e32" elif fmt == "VOP3" and op_val < 512: suffix = "_e64" else: suffix = "" - # FMAMK/FMAAK have a literal constant K that must be passed via literal= kwarg - # FMAMK: D = S0.f * K + S1.f (K is 3rd operand in assembly syntax) - # FMAAK: D = S0.f * S1.f + K (K is 4th operand in assembly syntax) if name in ('V_FMAMK_F32', 'V_FMAMK_F16'): lines.append(f"def {name.lower()}{suffix}(vdst, src0, K, vsrc1): return {fmt}({cls_name}.{name}, vdst, src0, vsrc1, literal=K)") elif name in ('V_FMAAK_F32', 'V_FMAAK_F16'): lines.append(f"def {name.lower()}{suffix}(vdst, src0, vsrc1, K): return {fmt}({cls_name}.{name}, vdst, src0, vsrc1, literal=K)") else: lines.append(f"{name.lower()}{suffix} = functools.partial({tgt}.{name}{seg})") - # export SrcEnum values, but skip DPP8/DPP16 which conflict with class names skip_exports = {'DPP8', 'DPP16'} src_names = {name for _, name in src_enum.items()} lines += [""] + [f"{name} = SrcEnum.{name}" for _, name in sorted(src_enum.items()) if name not in skip_exports] @@ -519,7 +566,12 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict: if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Generate instruction definitions from AMD ISA PDF") - parser.add_argument("--arch", choices=list(PDF_URLS.keys()), default="rdna3", help="Target architecture (default: rdna3)") + parser.add_argument("--arch", choices=list(PDF_URLS.keys()) + ["all"], default="rdna3", help="Target architecture (default: rdna3)") args = parser.parse_args() - result = generate(f"extra/assembly/amd/autogen/{args.arch}/__init__.py", arch=args.arch) - print(f"generated SrcEnum ({len(result['src_enum'])}) + {len(result['enums'])} opcode enums + {len(result['formats'])} format classes") + if args.arch == "all": + for arch in PDF_URLS.keys(): + result = generate(f"extra/assembly/amd/autogen/{arch}/__init__.py", arch=arch) + print(f"{arch}: generated SrcEnum ({len(result['src_enum'])}) + {len(result['enums'])} opcode enums + {len(result['formats'])} format classes") + else: + result = generate(f"extra/assembly/amd/autogen/{args.arch}/__init__.py", arch=args.arch) + print(f"generated SrcEnum ({len(result['src_enum'])}) + {len(result['enums'])} opcode enums + {len(result['formats'])} format classes") diff --git a/extra/assembly/amd/emu.py b/extra/assembly/amd/emu.py index 521b602e57..2a7c908ca5 100644 --- a/extra/assembly/amd/emu.py +++ b/extra/assembly/amd/emu.py @@ -2,7 +2,7 @@ # mypy: ignore-errors from __future__ import annotations import ctypes, os -from extra.assembly.amd.lib import Inst, RawImm +from extra.assembly.amd.dsl import Inst, RawImm from extra.assembly.amd.pcode import _f32, _i32, _sext, _f16, _i16, _f64, _i64 from extra.assembly.amd.autogen.rdna3.gen_pcode import get_compiled_functions from extra.assembly.amd.autogen.rdna3 import ( diff --git a/extra/assembly/amd/pcode.py b/extra/assembly/amd/pcode.py index 1d6dc0e60c..cb26620323 100644 --- a/extra/assembly/amd/pcode.py +++ b/extra/assembly/amd/pcode.py @@ -702,7 +702,7 @@ class ExecContext: # PDF EXTRACTION AND CODE GENERATION # ═══════════════════════════════════════════════════════════════════════════════ -from extra.assembly.amd.lib import PDF_URLS +from extra.assembly.amd.dsl import PDF_URLS INST_PATTERN = re.compile(r'^([SV]_[A-Z0-9_]+)\s+(\d+)\s*$', re.M) # Patterns that can't be handled by the DSL (require special handling in emu.py) @@ -736,38 +736,52 @@ def extract_pseudocode(text: str) -> str | None: if is_code: result.append(s) return '\n'.join(result) if result else None -def parse_pseudocode_from_pdf(arch: str = "rdna3") -> dict: - """Parse pseudocode from PDF for all ops. Returns {enum_cls: {op: pseudocode}}.""" +def _get_op_enums(arch: str) -> list: + """Dynamically load op enums from the arch-specific autogen module.""" + import importlib + autogen = importlib.import_module(f"extra.assembly.amd.autogen.{arch}") + # Deterministic order: common enums first, then arch-specific + enums = [] + for name in ['SOP1Op', 'SOP2Op', 'SOPCOp', 'SOPKOp', 'SOPPOp', 'VOP1Op', 'VOP2Op', 'VOP3Op', 'VOP3SDOp', 'VOP3POp', 'VOPCOp', 'VOP3AOp', 'VOP3BOp']: + if hasattr(autogen, name): enums.append(getattr(autogen, name)) + return enums + +def _parse_pseudocode_from_single_pdf(url: str, defined_ops: dict, OP_ENUMS: list) -> dict: + """Parse pseudocode from a single PDF.""" import pdfplumber from tinygrad.helpers import fetch - from extra.assembly.amd.autogen.rdna3 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp - OP_ENUMS = [SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp] - defined_ops = {} - for enum_cls in OP_ENUMS: - for op in enum_cls: - if op.name.startswith(('S_', 'V_')): defined_ops[(op.name, op.value)] = (enum_cls, op) + pdf = pdfplumber.open(fetch(url)) + total_pages = len(pdf.pages) - pdf = pdfplumber.open(fetch(PDF_URLS[arch])) + page_cache = {} + def get_page_text(i): + if i not in page_cache: page_cache[i] = pdf.pages[i].extract_text() or '' + return page_cache[i] - # Find the "Instructions" chapter by looking for "Chapter X. Instructions" + # Find the "Instructions" chapter - typically 15-40% through the document instr_start = None - for i, page in enumerate(pdf.pages): - text = page.extract_text() or '' - if re.search(r'Chapter \d+\.\s+Instructions', text): - instr_start = i - break - if instr_start is None: instr_start = len(pdf.pages) // 3 # fallback + search_starts = [int(total_pages * 0.2), int(total_pages * 0.1), 0] + for start in search_starts: + for i in range(start, min(start + 100, total_pages)): + if re.search(r'Chapter \d+\.\s+Instructions', get_page_text(i)): + instr_start = i + break + if instr_start: break + if instr_start is None: instr_start = total_pages // 3 # fallback - # Find end - stop at "Microcode Formats" chapter - instr_end = len(pdf.pages) - for i, page in enumerate(pdf.pages[instr_start:], instr_start): - text = page.extract_text() or '' - if re.search(r'Chapter \d+\.\s+Microcode Formats', text): - instr_end = i - break + # Find end - stop at "Microcode Formats" chapter (typically 60-70% through) + instr_end = total_pages + search_starts = [int(total_pages * 0.6), int(total_pages * 0.5), instr_start] + for start in search_starts: + for i in range(start, min(start + 100, total_pages)): + if re.search(r'Chapter \d+\.\s+Microcode Formats', get_page_text(i)): + instr_end = i + break + if instr_end < total_pages: break - all_text = '\n'.join(pdf.pages[i].extract_text() or '' for i in range(instr_start, instr_end)) + # Extract remaining pages (some already cached from chapter search) + all_text = '\n'.join(get_page_text(i) for i in range(instr_start, instr_end)) matches = list(INST_PATTERN.finditer(all_text)) instructions: dict = {cls: {} for cls in OP_ENUMS} @@ -783,12 +797,39 @@ def parse_pseudocode_from_pdf(arch: str = "rdna3") -> dict: return instructions +def parse_pseudocode_from_pdf(arch: str = "rdna3") -> dict: + """Parse pseudocode from PDF(s) for all ops. Returns {enum_cls: {op: pseudocode}}.""" + OP_ENUMS = _get_op_enums(arch) + defined_ops = {} + for enum_cls in OP_ENUMS: + for op in enum_cls: + if op.name.startswith(('S_', 'V_')): defined_ops[(op.name, op.value)] = (enum_cls, op) + + urls = PDF_URLS[arch] + if isinstance(urls, str): urls = [urls] + + # Parse all PDFs and merge (union of pseudocode) + # Reverse order so newer PDFs (RDNA3.5, CDNA4) take priority + instructions: dict = {cls: {} for cls in OP_ENUMS} + for url in reversed(urls): + result = _parse_pseudocode_from_single_pdf(url, defined_ops, OP_ENUMS) + for cls, ops in result.items(): + for op, pseudocode in ops.items(): + if op in instructions[cls]: + if instructions[cls][op] != pseudocode: + print(f" Ignoring {op.name} from older PDF:") + print(f" new: {instructions[cls][op]!r}") + print(f" old: {pseudocode!r}") + else: + instructions[cls][op] = pseudocode + + return instructions + def generate_gen_pcode(output_path: str = "extra/assembly/amd/autogen/rdna3/gen_pcode.py", arch: str = "rdna3"): """Generate gen_pcode.py - compiled pseudocode functions for the emulator.""" from pathlib import Path - from extra.assembly.amd.autogen.rdna3 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp - OP_ENUMS = [SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp] + OP_ENUMS = _get_op_enums(arch) print("Parsing pseudocode from PDF...") by_cls = parse_pseudocode_from_pdf(arch) @@ -803,11 +844,13 @@ def generate_gen_pcode(output_path: str = "extra/assembly/amd/autogen/rdna3/gen_ print(f"Total: {total_found}/{total_ops} ({100*total_found//total_ops}%)") print("\nCompiling to pseudocode functions...") + # Build dynamic import line based on available enums + enum_names = [e.__name__ for e in OP_ENUMS] lines = [f'''# autogenerated by pcode.py - do not edit # to regenerate: python -m extra.assembly.amd.pcode --arch {arch} # ruff: noqa: E501,F405,F403 # mypy: ignore-errors -from extra.assembly.amd.autogen.{arch} import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp +from extra.assembly.amd.autogen.{arch} import {", ".join(enum_names)} from extra.assembly.amd.pcode import * '''] @@ -965,7 +1008,9 @@ from extra.assembly.amd.pcode import * lines.append('') # Add manually implemented V_WRITELANE_B32 (not in PDF pseudocode, requires special vgpr_write handling) - lines.append(''' + # Only add for architectures that have VOP3Op (RDNA) not VOP3AOp/VOP3BOp (CDNA) + if 'VOP3Op' in enum_names: + lines.append(''' # V_WRITELANE_B32: Write scalar to specific lane's VGPR (not in PDF pseudocode) def _VOP3Op_V_WRITELANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): wr_lane = s1 & 0x1f # lane select (5 bits for wave32) @@ -987,6 +1032,10 @@ VOP3Op_FUNCTIONS[VOP3Op.V_WRITELANE_B32] = _VOP3Op_V_WRITELANE_B32 if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Generate pseudocode functions from AMD ISA PDF") - parser.add_argument("--arch", choices=list(PDF_URLS.keys()), default="rdna3", help="Target architecture (default: rdna3)") + parser.add_argument("--arch", choices=list(PDF_URLS.keys()) + ["all"], default="rdna3", help="Target architecture (default: rdna3)") args = parser.parse_args() - generate_gen_pcode(output_path=f"extra/assembly/amd/autogen/{args.arch}/gen_pcode.py", arch=args.arch) + if args.arch == "all": + for arch in PDF_URLS.keys(): + generate_gen_pcode(output_path=f"extra/assembly/amd/autogen/{arch}/gen_pcode.py", arch=arch) + else: + generate_gen_pcode(output_path=f"extra/assembly/amd/autogen/{args.arch}/gen_pcode.py", arch=args.arch) diff --git a/extra/assembly/amd/test/external_test_usability.py b/extra/assembly/amd/test/external_test_usability.py index 4d3c4813e4..df29891a67 100644 --- a/extra/assembly/amd/test/external_test_usability.py +++ b/extra/assembly/amd/test/external_test_usability.py @@ -4,7 +4,7 @@ import unittest from extra.assembly.amd.autogen.rdna3 import * -from extra.assembly.amd.lib import Inst, RawImm, SGPR, VGPR +from extra.assembly.amd.dsl import Inst, RawImm, SGPR, VGPR class TestRegisterSliceSyntax(unittest.TestCase): """ diff --git a/extra/assembly/amd/test/test_emu.py b/extra/assembly/amd/test/test_emu.py index 605ef88eef..f1edc38ae2 100644 --- a/extra/assembly/amd/test/test_emu.py +++ b/extra/assembly/amd/test/test_emu.py @@ -7,7 +7,7 @@ Set USE_HW=1 to run on both emulator and real hardware, comparing results. import ctypes, unittest, os, struct from extra.assembly.amd.autogen.rdna3 import * -from extra.assembly.amd.lib import RawImm +from extra.assembly.amd.dsl import RawImm from extra.assembly.amd.emu import WaveState, run_asm, set_valid_mem_ranges from extra.assembly.amd.pcode import _i32, _f32 diff --git a/extra/assembly/amd/test/test_formats.py b/extra/assembly/amd/test/test_formats.py index bae1addfc3..dc4504ddb9 100644 --- a/extra/assembly/amd/test/test_formats.py +++ b/extra/assembly/amd/test/test_formats.py @@ -2,7 +2,7 @@ """Test MUBUF, MTBUF, MIMG, EXP, DS formats against LLVM.""" import unittest from extra.assembly.amd.autogen.rdna3 import * -from extra.assembly.amd.lib import encode_src +from extra.assembly.amd.dsl import encode_src class TestMUBUF(unittest.TestCase): """Test MUBUF (buffer) instructions.""" @@ -308,7 +308,7 @@ class TestVOP3Literal(unittest.TestCase): def test_vop3_with_literal(self): # v_add3_u32 v5, vcc_hi, 0xaf123456, v255 # GFX11: encoding: [0x05,0x00,0x55,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] - from extra.assembly.amd.lib import RawImm + from extra.assembly.amd.dsl import RawImm inst = VOP3(VOP3Op.V_ADD3_U32, vdst=v[5], src0=RawImm(107), src1=0xaf123456, src2=v[255]) expected = bytes([0x05,0x00,0x55,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf]) self.assertEqual(inst.to_bytes(), expected) @@ -316,14 +316,14 @@ class TestVOP3Literal(unittest.TestCase): def test_vop3_literal_null_operand(self): # v_add3_u32 v5, null, exec_lo, 0xaf123456 # GFX11: encoding: [0x05,0x00,0x55,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf] - from extra.assembly.amd.lib import RawImm + from extra.assembly.amd.dsl import RawImm inst = VOP3(VOP3Op.V_ADD3_U32, vdst=v[5], src0=NULL, src1=RawImm(126), src2=0xaf123456) expected = bytes([0x05,0x00,0x55,0xd6,0x7c,0xfc,0xfc,0x03,0x56,0x34,0x12,0xaf]) self.assertEqual(inst.to_bytes(), expected) def test_vop3p_with_literal(self): # Test VOP3P literal encoding (also uses Inst64) - from extra.assembly.amd.lib import RawImm + from extra.assembly.amd.dsl import RawImm inst = VOP3P(VOP3POp.V_PK_ADD_F16, vdst=v[5], src0=RawImm(240), src1=0x12345678, src2=v[0]) self.assertEqual(len(inst.to_bytes()), 12) # 8 bytes + 4 byte literal diff --git a/extra/assembly/amd/test/test_handwritten.py b/extra/assembly/amd/test/test_handwritten.py index b075ea006a..cfe52dd52b 100644 --- a/extra/assembly/amd/test/test_handwritten.py +++ b/extra/assembly/amd/test/test_handwritten.py @@ -3,7 +3,7 @@ import unittest, struct from extra.assembly.amd.autogen.rdna3 import * -from extra.assembly.amd.lib import Inst +from extra.assembly.amd.dsl import Inst from extra.assembly.amd.asm import asm from extra.assembly.amd.test.test_roundtrip import compile_asm diff --git a/extra/assembly/amd/test/test_pdf_parser.py b/extra/assembly/amd/test/test_pdf_parser.py index 8158ed9651..92a565213a 100644 --- a/extra/assembly/amd/test/test_pdf_parser.py +++ b/extra/assembly/amd/test/test_pdf_parser.py @@ -41,7 +41,7 @@ class TestPDFParserGenerate(unittest.TestCase): def test_pdf_parser(self): """Single test that validates all PDF parser outputs.""" - from extra.assembly.amd.lib import generate + from extra.assembly.amd.dsl import generate result = generate() # test_all_formats_present diff --git a/extra/assembly/amd/test/test_roundtrip.py b/extra/assembly/amd/test/test_roundtrip.py index 938ceb48aa..7fe12d9855 100644 --- a/extra/assembly/amd/test/test_roundtrip.py +++ b/extra/assembly/amd/test/test_roundtrip.py @@ -2,7 +2,7 @@ """Roundtrip tests: generate tinygrad kernels, decode instructions, re-encode, verify match.""" import unittest, io, sys, re, subprocess, os from extra.assembly.amd.autogen.rdna3 import * -from extra.assembly.amd.lib import Inst +from extra.assembly.amd.dsl import Inst from extra.assembly.amd.asm import asm from extra.assembly.amd.test.helpers import get_llvm_mc, get_llvm_objdump