From a19d21ea9ce454cd4e88925db238980c0068ad57 Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Tue, 30 Dec 2025 16:44:17 +0300 Subject: [PATCH 1/8] am: mi3xx smu clocks (#13894) * am: mi3xx smu clocks * x --- tinygrad/runtime/support/am/ip.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tinygrad/runtime/support/am/ip.py b/tinygrad/runtime/support/am/ip.py index 59e0fa4301..76140912a2 100644 --- a/tinygrad/runtime/support/am/ip.py +++ b/tinygrad/runtime/support/am/ip.py @@ -189,16 +189,18 @@ class AM_SMU(AM_IP): return table_t.from_buffer(bytearray(self.adev.vram.view(self.driver_table_paddr, ctypes.sizeof(table_t))[:])) def set_clocks(self, level): - if self.adev.ip_ver[am.MP0_HWIP] in {(13,0,6), (13,0,12)}: return # TODO - if not hasattr(self, 'clcks'): + clks = [self.smu_mod.PPCLK_UCLK, self.smu_mod.PPCLK_FCLK, self.smu_mod.PPCLK_SOCCLK] + if self.adev.ip_ver[am.MP0_HWIP] not in {(13,0,6), (13,0,12)}: clks.append(self.smu_mod.PPCLK_GFXCLK) + self.clcks = {} - for clck in [self.smu_mod.PPCLK_GFXCLK, self.smu_mod.PPCLK_UCLK, self.smu_mod.PPCLK_FCLK, self.smu_mod.PPCLK_SOCCLK]: + for clck in clks: cnt = self._send_msg(self.smu_mod.PPSMC_MSG_GetDpmFreqByIndex, (clck<<16)|0xff, read_back_arg=True)&0x7fffffff self.clcks[clck] = [self._send_msg(self.smu_mod.PPSMC_MSG_GetDpmFreqByIndex, (clck<<16)|i, read_back_arg=True)&0x7fffffff for i in range(cnt)] for clck, vals in self.clcks.items(): - self._send_msg(self.smu_mod.PPSMC_MSG_SetSoftMinByFreq, clck << 16 | (vals[level])) + if not vals: continue + with contextlib.suppress(TimeoutError): self._send_msg(self.smu_mod.PPSMC_MSG_SetSoftMinByFreq, clck << 16 | (vals[level]), timeout=20) self._send_msg(self.smu_mod.PPSMC_MSG_SetSoftMaxByFreq, clck << 16 | (vals[level])) def _smu_cmn_send_msg(self, msg:int, param=0, debug=False): From 2b838dc1d8fd8d6874cd6dd9cfffd3442ef68115 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Tue, 30 Dec 2025 09:09:57 -0500 Subject: [PATCH 2/8] assembly/amd: fix AMD_LLVM=1 support in emulator (#13881) * fix AMD_LLVM=1 support in emulator * more llvm with dtype * work * more fixes * fix dtype --- .github/workflows/test.yml | 2 + extra/assembly/amd/asm.py | 21 +- extra/assembly/amd/dsl.py | 3 + extra/assembly/amd/emu.py | 174 ++- extra/assembly/amd/pcode.py | 29 +- .../amd/test/test_compare_emulators.py | 4 + extra/assembly/amd/test/test_emu.py | 1026 +++++++++++++++++ extra/assembly/amd/test/test_roundtrip.py | 7 +- 8 files changed, 1227 insertions(+), 39 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b77fd7068e..657a85a077 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -677,6 +677,8 @@ jobs: run: cloc --by-file extra/assembly/amd/*.py - name: Run RDNA3 emulator tests run: python -m pytest -n=auto extra/assembly/amd/ --durations 20 + - name: Run RDNA3 emulator tests (AMD_LLVM=1) + run: AMD_LLVM=1 python -m pytest -n=auto extra/assembly/amd/ --durations 20 - name: Install pdfplumber run: pip install pdfplumber - name: Verify AMD autogen is up to date diff --git a/extra/assembly/amd/asm.py b/extra/assembly/amd/asm.py index a8a5f55f54..3496795dc9 100644 --- a/extra/assembly/amd/asm.py +++ b/extra/assembly/amd/asm.py @@ -219,9 +219,12 @@ def disasm(inst: Inst) -> str: src2_str = fmt_sd_src(src2, neg & 4, is_mad64) dst_str = _vreg(vdst, 2) if (is_f64 or is_mad64) else f"v{vdst}" sdst_str = _fmt_sdst(sdst, 1) - # v_add_co_u32, v_sub_co_u32, v_subrev_co_u32, v_add_co_ci_u32, etc. only use 2 sources - if op_name in ('v_add_co_u32', 'v_sub_co_u32', 'v_subrev_co_u32', 'v_add_co_ci_u32', 'v_sub_co_ci_u32', 'v_subrev_co_ci_u32'): + # v_add_co_u32, v_sub_co_u32, v_subrev_co_u32 only use 2 sources + if op_name in ('v_add_co_u32', 'v_sub_co_u32', 'v_subrev_co_u32'): return f"{op_name} {dst_str}, {sdst_str}, {src0_str}, {src1_str}" + # v_add_co_ci_u32, v_sub_co_ci_u32, v_subrev_co_ci_u32 use 3 sources (src2 is carry-in) + if op_name in ('v_add_co_ci_u32', 'v_sub_co_ci_u32', 'v_subrev_co_ci_u32'): + return f"{op_name} {dst_str}, {sdst_str}, {src0_str}, {src1_str}, {src2_str}" # v_div_scale uses 3 sources return f"{op_name} {dst_str}, {sdst_str}, {src0_str}, {src1_str}, {src2_str}" + omod_str @@ -351,12 +354,17 @@ def disasm(inst: Inst) -> str: from extra.assembly.amd.autogen import rdna3 as autogen opx, opy, vdstx, vdsty_enc = [unwrap(inst._values.get(f, 0)) for f in ('opx', 'opy', 'vdstx', 'vdsty')] srcx0, vsrcx1, srcy0, vsrcy1 = [unwrap(inst._values.get(f, 0)) for f in ('srcx0', 'vsrcx1', 'srcy0', 'vsrcy1')] + literal = inst._literal if hasattr(inst, '_literal') and inst._literal else unwrap(inst._values.get('literal', None)) vdsty = (vdsty_enc << 1) | ((vdstx & 1) ^ 1) # Decode vdsty - def fmt_vopd(op, vdst, src0, vsrc1): + def fmt_vopd(op, vdst, src0, vsrc1, include_lit): try: name = autogen.VOPDOp(op).name.lower() except (ValueError, KeyError): name = f"op_{op}" - return f"{name} v{vdst}, {fmt_src(src0)}" if 'mov' in name else f"{name} v{vdst}, {fmt_src(src0)}, v{vsrc1}" - return f"{fmt_vopd(opx, vdstx, srcx0, vsrcx1)} :: {fmt_vopd(opy, vdsty, srcy0, vsrcy1)}" + lit_str = f", 0x{literal:x}" if include_lit and literal is not None and ('fmaak' in name or 'fmamk' in name) else "" + return f"{name} v{vdst}, {fmt_src(src0)}{lit_str}" if 'mov' in name else f"{name} v{vdst}, {fmt_src(src0)}, v{vsrc1}{lit_str}" + # fmaak/fmamk: both X and Y can use the shared literal + x_needs_lit = 'fmaak' in autogen.VOPDOp(opx).name.lower() or 'fmamk' in autogen.VOPDOp(opx).name.lower() + y_needs_lit = 'fmaak' in autogen.VOPDOp(opy).name.lower() or 'fmamk' in autogen.VOPDOp(opy).name.lower() + return f"{fmt_vopd(opx, vdstx, srcx0, vsrcx1, x_needs_lit)} :: {fmt_vopd(opy, vdsty, srcy0, vsrcy1, y_needs_lit)}" # VOP3P: packed vector ops if cls_name == 'VOP3P': @@ -721,6 +729,9 @@ def get_dsl(text: str) -> str: if mnemonic.replace('_e32', '') in vcc_ops and len(dsl_args) >= 5: mnemonic = mnemonic.replace('_e32', '') + '_e32' # Ensure _e32 suffix for VOP2 encoding dsl_args = [dsl_args[0], dsl_args[2], dsl_args[3]] + # Handle v_add_co_ci_u32_e64 etc - strip _e64 suffix (function name doesn't have it, returns VOP3SD) + if mnemonic.replace('_e64', '') in vcc_ops and mnemonic.endswith('_e64'): + mnemonic = mnemonic.replace('_e64', '') # v_cmp_*_e32: strip implicit vcc_lo dest if mnemonic.startswith('v_cmp') and not mnemonic.endswith('_e64') and len(dsl_args) >= 3 and operands[0].strip().lower() in ('vcc_lo', 'vcc_hi', 'vcc'): dsl_args = dsl_args[1:] diff --git a/extra/assembly/amd/dsl.py b/extra/assembly/amd/dsl.py index ae62c2fcee..615597e81b 100644 --- a/extra/assembly/amd/dsl.py +++ b/extra/assembly/amd/dsl.py @@ -315,6 +315,9 @@ class Inst: op_val = inst._values.get('op', 0) has_literal = cls.__name__ == 'VOP2' and op_val in (44, 45, 55, 56) has_literal = has_literal or (cls.__name__ == 'SOP2' and op_val in (69, 70)) + # VOPD fmaak/fmamk always have a literal (opx/opy value 1 or 2) + opx, opy = inst._values.get('opx', 0), inst._values.get('opy', 0) + has_literal = has_literal or (cls.__name__ == 'VOPD' and (opx in (1, 2) or opy in (1, 2))) for n in SRC_FIELDS: if n in inst._values and isinstance(inst._values[n], RawImm) and inst._values[n].val == 255: has_literal = True if has_literal: diff --git a/extra/assembly/amd/emu.py b/extra/assembly/amd/emu.py index cce704c55f..dbbd33b820 100644 --- a/extra/assembly/amd/emu.py +++ b/extra/assembly/amd/emu.py @@ -24,12 +24,18 @@ _VOP3_64BIT_OPS_32BIT_SRC1 = {VOP3Op.V_LDEXP_F64.value} _VOP3_16BIT_OPS = {op for op in VOP3Op if any(s in op.name for s in ('_F16', '_B16', '_I16', '_U16')) and 'SAD' not in op.name} _VOP1_16BIT_OPS = {op for op in VOP1Op if any(s in op.name for s in ('_F16', '_B16', '_I16', '_U16'))} _VOP2_16BIT_OPS = {op for op in VOP2Op if any(s in op.name for s in ('_F16', '_B16', '_I16', '_U16'))} +_VOPC_16BIT_OPS = {op for op in VOPCOp if any(s in op.name for s in ('_F16', '_B16', '_I16', '_U16'))} # CVT ops with 32/64-bit source (despite 16-bit in name) _CVT_32_64_SRC_OPS = {op for op in VOP3Op if op.name.startswith('V_CVT_') and op.name.endswith(('_F32', '_I32', '_U32', '_F64', '_I64', '_U64'))} | \ {op for op in VOP1Op if op.name.startswith('V_CVT_') and op.name.endswith(('_F32', '_I32', '_U32', '_F64', '_I64', '_U64'))} -# 16-bit dst ops (PACK has 32-bit dst despite F16 in name) -_VOP3_16BIT_DST_OPS = {op for op in _VOP3_16BIT_OPS if 'PACK' not in op.name} -_VOP1_16BIT_DST_OPS = {op for op in _VOP1_16BIT_OPS if 'PACK' not in op.name} +# CVT ops with 32-bit destination (convert FROM 16-bit TO 32-bit): V_CVT_F32_F16, V_CVT_I32_I16, V_CVT_U32_U16 +_CVT_32_DST_OPS = {op for op in VOP3Op if op.name.startswith('V_CVT_') and any(s in op.name for s in ('F32_F16', 'I32_I16', 'U32_U16', 'I32_F16', 'U32_F16'))} | \ + {op for op in VOP1Op if op.name.startswith('V_CVT_') and any(s in op.name for s in ('F32_F16', 'I32_I16', 'U32_U16', 'I32_F16', 'U32_F16'))} +# 16-bit dst ops (PACK has 32-bit dst despite F16 in name, CVT to 32-bit has 32-bit dst) +_VOP3_16BIT_DST_OPS = {op for op in _VOP3_16BIT_OPS if 'PACK' not in op.name} - _CVT_32_DST_OPS +_VOP1_16BIT_DST_OPS = {op for op in _VOP1_16BIT_OPS if 'PACK' not in op.name} - _CVT_32_DST_OPS +# VOP1 16-bit source ops (excluding CVT ops with 32/64-bit source) - for VOP1 e32, .h encoded in register index +_VOP1_16BIT_SRC_OPS = _VOP1_16BIT_OPS - _CVT_32_64_SRC_OPS # Inline constants for src operands 128-254. Build tables for f32, f16, and f64 formats. import struct as _struct @@ -371,11 +377,25 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No # Get op enum and sources (None means "no source" for that operand) + # vop1_dst_hi/vop2_dst_hi: for VOP1/VOP2 16-bit dst ops, bit 7 of vdst indicates .h (high 16-bit) destination + vop1_dst_hi, vop2_dst_hi = False, False if inst_type is VOP1: if inst.op == VOP1Op.V_NOP: return - op_cls, op, src0, src1, src2, vdst = VOP1Op, VOP1Op(inst.op), inst.src0, None, None, inst.vdst + op_cls, op, src0, src1, src2 = VOP1Op, VOP1Op(inst.op), inst.src0, None, None + # For 16-bit dst ops, vdst encodes .h in bit 7 + if op in _VOP1_16BIT_DST_OPS: + vop1_dst_hi = (inst.vdst & 0x80) != 0 + vdst = inst.vdst & 0x7f + else: + vdst = inst.vdst elif inst_type is VOP2: - op_cls, op, src0, src1, src2, vdst = VOP2Op, VOP2Op(inst.op), inst.src0, inst.vsrc1 + 256, None, inst.vdst + op_cls, op, src0, src1, src2 = VOP2Op, VOP2Op(inst.op), inst.src0, inst.vsrc1 + 256, None + # For 16-bit dst ops, vdst encodes .h in bit 7 + if op in _VOP2_16BIT_OPS: + vop2_dst_hi = (inst.vdst & 0x80) != 0 + vdst = inst.vdst & 0x7f + else: + vdst = inst.vdst elif inst_type is VOP3: # VOP3 ops 0-255 are VOPC comparisons encoded as VOP3 (use VOPCOp pseudocode) if inst.op < 256: @@ -397,7 +417,11 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No V[vdst] = result & 0xffffffff return elif inst_type is VOPC: - op_cls, op, src0, src1, src2, vdst = VOPCOp, VOPCOp(inst.op), inst.src0, inst.vsrc1 + 256, None, VCC_LO + op = VOPCOp(inst.op) + # For 16-bit VOPC, vsrc1 uses same encoding as VOP2 16-bit: bit 7 selects hi(1) or lo(0) half + # vsrc1 field is 8 bits: [6:0] = VGPR index, [7] = hi flag + src1 = inst.vsrc1 + 256 # convert to standard VGPR encoding (256 + vgpr_idx) + op_cls, src0, src2, vdst = VOPCOp, inst.src0, None, VCC_LO elif inst_type is VOP3P: # VOP3P: Packed 16-bit operations using compiled functions op = VOP3POp(inst.op) @@ -406,26 +430,44 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No if lane == 0: # Only execute once per wave, write results for all lanes exec_wmma(st, inst, op) return - # V_FMA_MIX: Mixed precision FMA - inputs can be f16 or f32 controlled by opsel + # V_FMA_MIX: Mixed precision FMA - inputs can be f16 or f32 controlled by opsel_hi/opsel_hi2 + # opsel_hi[0]: src0 is f32 (0) or f16 from hi bits (1) + # opsel_hi[1]: src1 is f32 (0) or f16 from hi bits (1) + # opsel_hi2: src2 is f32 (0) or f16 from hi bits (1) + # opsel[i]: when source is f16, use lo (0) or hi (1) 16 bits - BUT for V_FMA_MIX, opsel selects lo/hi when opsel_hi=1 + # neg_hi[i]: abs modifier for source i (reuses neg_hi field for abs in V_FMA_MIX) if op in (VOP3POp.V_FMA_MIX_F32, VOP3POp.V_FMA_MIXLO_F16, VOP3POp.V_FMA_MIXHI_F16): opsel = getattr(inst, 'opsel', 0) opsel_hi = getattr(inst, 'opsel_hi', 0) + opsel_hi2 = getattr(inst, 'opsel_hi2', 0) neg = getattr(inst, 'neg', 0) - neg_hi = getattr(inst, 'neg_hi', 0) + abs_ = getattr(inst, 'neg_hi', 0) # neg_hi field is reused as abs for V_FMA_MIX vdst = inst.vdst - # Read raw 32-bit values - for V_FMA_MIX, sources can be either f32 or f16 + # Read raw 32-bit values s0_raw = st.rsrc(inst.src0, lane) s1_raw = st.rsrc(inst.src1, lane) s2_raw = st.rsrc(inst.src2, lane) if inst.src2 is not None else 0 - # opsel[i]=0: use as f32, opsel[i]=1: use hi f16 as f32 - # For src0: opsel[0], for src1: opsel[1], for src2: opsel[2] - if opsel & 1: s0 = _f16((s0_raw >> 16) & 0xffff) # hi f16 -> f32 - else: s0 = _f32(s0_raw) # use as f32 - if opsel & 2: s1 = _f16((s1_raw >> 16) & 0xffff) - else: s1 = _f32(s1_raw) - if opsel & 4: s2 = _f16((s2_raw >> 16) & 0xffff) - else: s2 = _f32(s2_raw) - # Apply neg modifiers (for f32 values) + # Decode sources based on opsel_hi (controls f32 vs f16) and opsel (controls which half for f16) + # src0: opsel_hi[0]=1 means f16, opsel[0] selects hi(1) or lo(0) half + if opsel_hi & 1: + s0 = _f16((s0_raw >> 16) & 0xffff) if (opsel & 1) else _f16(s0_raw & 0xffff) + else: + s0 = _f32(s0_raw) + # src1: opsel_hi[1]=1 means f16, opsel[1] selects hi(1) or lo(0) half + if opsel_hi & 2: + s1 = _f16((s1_raw >> 16) & 0xffff) if (opsel & 2) else _f16(s1_raw & 0xffff) + else: + s1 = _f32(s1_raw) + # src2: opsel_hi2=1 means f16, opsel[2] selects hi(1) or lo(0) half + if opsel_hi2: + s2 = _f16((s2_raw >> 16) & 0xffff) if (opsel & 4) else _f16(s2_raw & 0xffff) + else: + s2 = _f32(s2_raw) + # Apply abs modifiers (abs_ field reuses neg_hi position) + if abs_ & 1: s0 = abs(s0) + if abs_ & 2: s1 = abs(s1) + if abs_ & 4: s2 = abs(s2) + # Apply neg modifiers if neg & 1: s0 = -s0 if neg & 2: s1 = -s1 if neg & 4: s2 = -s2 @@ -505,7 +547,7 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No is_ldexp_64 = op in (VOP3Op.V_LDEXP_F64,) is_shift_64 = op in (VOP3Op.V_LSHLREV_B64, VOP3Op.V_LSHRREV_B64, VOP3Op.V_ASHRREV_I64) # 16-bit source ops: use precomputed sets instead of string checks - has_16bit_type = op in _VOP3_16BIT_OPS or op in _VOP1_16BIT_OPS or op in _VOP2_16BIT_OPS + # Note: must check op_cls to avoid cross-enum value collisions is_16bit_src = op_cls is VOP3Op and op in _VOP3_16BIT_OPS and op not in _CVT_32_64_SRC_OPS # VOP2 16-bit ops use f16 inline constants for src0 (vsrc1 is always a VGPR, no inline constants) is_vop2_16bit = op_cls is VOP2Op and op in _VOP2_16BIT_OPS @@ -525,27 +567,88 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No s2 = mod_src64(st.rsrc64(src2, lane), 2) if src2 is not None else 0 elif is_16bit_src: # For 16-bit source ops, opsel bits select which half to use - s0_raw = mod_src(st.rsrc(src0, lane), 0) - s1_raw = mod_src(st.rsrc(src1, lane), 1) if src1 is not None else 0 - s2_raw = mod_src(st.rsrc(src2, lane), 2) if src2 is not None else 0 + # Inline constants (128-254) must use f16 encoding, not f32 + def rsrc_16bit(src, lane): return st.rsrc_f16(src, lane) if 128 <= src < 255 else st.rsrc(src, lane) + s0_raw = rsrc_16bit(src0, lane) + s1_raw = rsrc_16bit(src1, lane) if src1 is not None else 0 + s2_raw = rsrc_16bit(src2, lane) if src2 is not None else 0 # opsel[0] selects hi(1) or lo(0) for src0, opsel[1] for src1, opsel[2] for src2 s0 = ((s0_raw >> 16) & 0xffff) if (opsel & 1) else (s0_raw & 0xffff) s1 = ((s1_raw >> 16) & 0xffff) if (opsel & 2) else (s1_raw & 0xffff) s2 = ((s2_raw >> 16) & 0xffff) if (opsel & 4) else (s2_raw & 0xffff) + # Apply abs/neg modifiers as f16 operations (toggle sign bit 15) + if abs_ & 1: s0 &= 0x7fff + if abs_ & 2: s1 &= 0x7fff + if abs_ & 4: s2 &= 0x7fff + if neg & 1: s0 ^= 0x8000 + if neg & 2: s1 ^= 0x8000 + if neg & 4: s2 ^= 0x8000 elif is_vop2_16bit: - # VOP2 16-bit ops: src0 can use f16 inline constants, vsrc1 is always a VGPR (no inline constants) - s0 = mod_src(st.rsrc_f16(src0, lane), 0) - s1 = mod_src(st.rsrc(src1, lane), 1) if src1 is not None else 0 + # VOP2 16-bit ops: src0 uses f16 inline constants, or VGPR where v128+ = hi half of v0-v127 + # RDNA3 encoding: for VGPRs, bit 7 of VGPR index (src0-256) selects hi(1) or lo(0) half + if src0 >= 256: # VGPR + src0_hi = (src0 - 256) & 0x80 != 0 + src0_masked = ((src0 - 256) & 0x7f) + 256 # mask out hi bit to get actual VGPR + s0_raw = mod_src(st.rsrc(src0_masked, lane), 0) + s0 = ((s0_raw >> 16) & 0xffff) if src0_hi else (s0_raw & 0xffff) + else: # SGPR or inline constant + s0_raw = mod_src(st.rsrc_f16(src0, lane), 0) + s0 = s0_raw & 0xffff + # vsrc1: .h suffix encoded in bit 7 of VGPR index (src1 = 256 + vgpr_idx + 0x80 if hi) + if src1 is not None: + src1_hi = (src1 - 256) & 0x80 != 0 + src1_masked = ((src1 - 256) & 0x7f) + 256 + s1_raw = mod_src(st.rsrc(src1_masked, lane), 1) + s1 = ((s1_raw >> 16) & 0xffff) if src1_hi else (s1_raw & 0xffff) + else: + s1 = 0 s2 = mod_src(st.rsrc(src2, lane), 2) if src2 is not None else 0 + elif op_cls is VOP1Op and op in _VOP1_16BIT_SRC_OPS: + # VOP1 16-bit source ops: .h encoded in bit 7 of VGPR index (src0 >= 384 means hi half) + # For VGPRs: src0 = 256 + vgpr_idx + (0x80 if hi else 0), so bit 7 of (src0-256) is the hi flag + src0_hi = src0 >= 256 and ((src0 - 256) & 0x80) != 0 + src0_masked = ((src0 - 256) & 0x7f) + 256 if src0 >= 256 else src0 # mask out hi bit for VGPR + s0_raw = mod_src(st.rsrc(src0_masked, lane), 0) + s0 = ((s0_raw >> 16) & 0xffff) if src0_hi else (s0_raw & 0xffff) + s1, s2 = 0, 0 + elif op_cls is VOPCOp and op in _VOPC_16BIT_OPS: + # VOPC 16-bit ops: src0 and vsrc1 use same encoding as VOP2 16-bit + # For VGPRs, bit 7 of VGPR index selects hi(1) or lo(0) half + if src0 >= 256: # VGPR + src0_hi = (src0 - 256) & 0x80 != 0 + src0_masked = ((src0 - 256) & 0x7f) + 256 + s0_raw = mod_src(st.rsrc(src0_masked, lane), 0) + s0 = ((s0_raw >> 16) & 0xffff) if src0_hi else (s0_raw & 0xffff) + else: # SGPR or inline constant + s0_raw = mod_src(st.rsrc_f16(src0, lane), 0) + s0 = s0_raw & 0xffff + # vsrc1: bit 7 of VGPR index selects hi(1) or lo(0) half + if src1 is not None: + if src1 >= 256: # VGPR - use hi/lo encoding + src1_hi = (src1 - 256) & 0x80 != 0 + src1_masked = ((src1 - 256) & 0x7f) + 256 + s1_raw = mod_src(st.rsrc(src1_masked, lane), 1) + s1 = ((s1_raw >> 16) & 0xffff) if src1_hi else (s1_raw & 0xffff) + else: # SGPR or inline constant - read as 32-bit, use low 16 bits + s1_raw = mod_src(st.rsrc(src1, lane), 1) + s1 = s1_raw & 0xffffffff # V_CMP_CLASS uses full 32-bit mask + else: + s1 = 0 + s2 = 0 else: s0 = mod_src(st.rsrc(src0, lane), 0) s1 = mod_src(st.rsrc(src1, lane), 1) if src1 is not None else 0 s2 = mod_src(st.rsrc(src2, lane), 2) if src2 is not None else 0 - d0 = V[vdst] if not is_64bit_op else (V[vdst] | (V[vdst + 1] << 32)) + # For VOP2 16-bit ops (like V_FMAC_F16), the destination is used as an accumulator. + # The pseudocode reads D0.f16 from low 16 bits, so we need to shift hi->lo when vop2_dst_hi is True. + if is_vop2_16bit: + d0 = ((V[vdst] >> 16) & 0xffff) if vop2_dst_hi else (V[vdst] & 0xffff) + else: + d0 = V[vdst] if not is_64bit_op else (V[vdst] | (V[vdst + 1] << 32)) - # V_CNDMASK_B32: VOP3 encoding uses src2 as mask (not VCC); VOP2 uses VCC implicitly + # V_CNDMASK_B32/B16: VOP3 encoding uses src2 as mask (not VCC); VOP2 uses VCC implicitly # Pass the correct mask as vcc to the function so pseudocode VCC.u64[laneId] works correctly - vcc_for_fn = st.rsgpr64(src2) if op in (VOP3Op.V_CNDMASK_B32,) and inst_type is VOP3 and src2 is not None and src2 < 256 else st.vcc + vcc_for_fn = st.rsgpr64(src2) if op in (VOP3Op.V_CNDMASK_B32, VOP3Op.V_CNDMASK_B16) and inst_type is VOP3 and src2 is not None and src2 < 256 else st.vcc # Execute compiled function - pass src0_idx and vdst_idx for lane instructions # For VGPR access: src0 index is the VGPR number (src0 - 256 if VGPR, else src0 for SGPR) @@ -571,7 +674,8 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No writes_to_sgpr = op in (VOP1Op.V_READFIRSTLANE_B32,) or \ (op_cls is VOP3Op and op in (VOP3Op.V_READFIRSTLANE_B32, VOP3Op.V_READLANE_B32)) # Check for 16-bit destination ops (opsel[3] controls hi/lo write) - is_16bit_dst = op in _VOP3_16BIT_DST_OPS or op in _VOP1_16BIT_DST_OPS + # Must check op_cls to avoid cross-enum value collisions (e.g., VOP1Op.V_MOV_B32=1 vs VOP3Op.V_CMP_LT_F16=1) + is_16bit_dst = (op_cls is VOP3Op and op in _VOP3_16BIT_DST_OPS) or (op_cls is VOP1Op and op in _VOP1_16BIT_DST_OPS) if writes_to_sgpr: st.wsgpr(vdst, result['d0'] & 0xffffffff) elif result.get('d0_64') or is_64bit_op: @@ -583,6 +687,18 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No V[vdst] = (V[vdst] & 0x0000ffff) | ((result['d0'] & 0xffff) << 16) else: # opsel[3] = 0: write to low 16 bits V[vdst] = (V[vdst] & 0xffff0000) | (result['d0'] & 0xffff) + elif is_16bit_dst and inst_type is VOP1: + # VOP1 16-bit ops: .h suffix encoded in bit 7 of vdst (extracted as vop1_dst_hi) + if vop1_dst_hi: # .h: write to high 16 bits + V[vdst] = (V[vdst] & 0x0000ffff) | ((result['d0'] & 0xffff) << 16) + else: # .l: write to low 16 bits + V[vdst] = (V[vdst] & 0xffff0000) | (result['d0'] & 0xffff) + elif is_vop2_16bit: + # VOP2 16-bit ops: .h suffix encoded in bit 7 of vdst (extracted as vop2_dst_hi) + if vop2_dst_hi: # .h: write to high 16 bits + V[vdst] = (V[vdst] & 0x0000ffff) | ((result['d0'] & 0xffff) << 16) + else: # .l: write to low 16 bits + V[vdst] = (V[vdst] & 0xffff0000) | (result['d0'] & 0xffff) else: V[vdst] = result['d0'] & 0xffffffff diff --git a/extra/assembly/amd/pcode.py b/extra/assembly/amd/pcode.py index dcf96a5a99..05b23d7528 100644 --- a/extra/assembly/amd/pcode.py +++ b/extra/assembly/amd/pcode.py @@ -35,12 +35,18 @@ def _isnan(x): try: return math.isnan(float(x)) except (TypeError, ValueError): return False def _isquietnan(x): - """Check if x is a quiet NaN. For f32: exponent=255, bit22=1, mantissa!=0""" + """Check if x is a quiet NaN. + f16: exponent=31, bit9=1, mantissa!=0 + f32: exponent=255, bit22=1, mantissa!=0 + f64: exponent=2047, bit51=1, mantissa!=0 + """ try: if not math.isnan(float(x)): return False # Get raw bits from TypedView or similar object with _reg attribute if hasattr(x, '_reg') and hasattr(x, '_bits'): bits = x._reg._val & ((1 << x._bits) - 1) + if x._bits == 16: + return ((bits >> 10) & 0x1f) == 31 and ((bits >> 9) & 1) == 1 and (bits & 0x3ff) != 0 if x._bits == 32: return ((bits >> 23) & 0xff) == 255 and ((bits >> 22) & 1) == 1 and (bits & 0x7fffff) != 0 if x._bits == 64: @@ -48,12 +54,18 @@ def _isquietnan(x): return True # Default to quiet NaN if we can't determine bit pattern except (TypeError, ValueError): return False def _issignalnan(x): - """Check if x is a signaling NaN. For f32: exponent=255, bit22=0, mantissa!=0""" + """Check if x is a signaling NaN. + f16: exponent=31, bit9=0, mantissa!=0 + f32: exponent=255, bit22=0, mantissa!=0 + f64: exponent=2047, bit51=0, mantissa!=0 + """ try: if not math.isnan(float(x)): return False # Get raw bits from TypedView or similar object with _reg attribute if hasattr(x, '_reg') and hasattr(x, '_bits'): bits = x._reg._val & ((1 << x._bits) - 1) + if x._bits == 16: + return ((bits >> 10) & 0x1f) == 31 and ((bits >> 9) & 1) == 0 and (bits & 0x3ff) != 0 if x._bits == 32: return ((bits >> 23) & 0xff) == 255 and ((bits >> 22) & 1) == 0 and (bits & 0x7fffff) != 0 if x._bits == 64: @@ -73,7 +85,11 @@ def floor(x): def ceil(x): x = float(x) return x if math.isnan(x) or math.isinf(x) else float(math.ceil(x)) -def sqrt(x): return math.sqrt(x) if x >= 0 else float("nan") +class _SafeFloat(float): + """Float subclass that uses _div for division to handle 0/inf correctly.""" + def __truediv__(self, o): return _div(float(self), float(o)) + def __rtruediv__(self, o): return _div(float(o), float(self)) +def sqrt(x): return _SafeFloat(math.sqrt(x)) if x >= 0 else _SafeFloat(float("nan")) def log2(x): return math.log2(x) if x > 0 else (float("-inf") if x == 0 else float("nan")) i32_to_f32 = u32_to_f32 = i32_to_f64 = u32_to_f64 = f32_to_f64 = f64_to_f32 = float def f32_to_i32(f): @@ -107,7 +123,10 @@ def u4_to_u32(v): return int(v) & 0xf def _sign(f): return 1 if math.copysign(1.0, f) < 0 else 0 def _mantissa_f32(f): return struct.unpack("> 16) & 0xffff + self.assertAlmostEqual(lo, 7.0, places=1, msg=f"lo: 2*3+1=7, got {lo}") + self.assertEqual(hi, 0xdead, f"hi should be preserved, got 0x{hi:04x}") + + class TestF64Conversions(unittest.TestCase): """Tests for 64-bit float operations and conversions.""" @@ -2598,5 +2696,933 @@ class TestQuadmaskWqm(unittest.TestCase): self.assertEqual(st.scc, 0, "SCC should be 0 (result == 0)") +class TestVOP2_16bit_HiHalf(unittest.TestCase): + """Regression tests for VOP2 16-bit ops reading from high half of VGPR (v128+ encoding). + + Bug: VOP2 16-bit ops like v_add_f16 with src0 as v128+ should read the HIGH 16 bits + of the corresponding VGPR (v128 = v0.hi, v129 = v1.hi, etc). The emulator was + incorrectly reading from VGPR v128+ instead of the high half of v0+. + + Example: v_add_f16 v0, v128, v0 means v0.lo = v0.hi + v0.lo (fold packed result) + """ + + def test_v_add_f16_src0_hi_fold(self): + """v_add_f16 with src0=v128 (v0.hi) - fold packed f16 values. + + This pattern is generated by LLVM for summing packed f16 results: + v_pk_mul_f16 produces [hi, lo] in v0, then v_add_f16 v0, v128, v0 sums them. + """ + instructions = [ + # v0 = packed f16: high=2.0 (0x4000), low=1.0 (0x3c00) + s_mov_b32(s[0], 0x40003c00), + v_mov_b32_e32(v[0], s[0]), + # v_add_f16 v1, v128, v0 means: v1.lo = v0.hi + v0.lo = 2.0 + 1.0 = 3.0 + # v128 in src0 means "read high 16 bits of v0" + v_add_f16_e32(v[1], v[0].h, v[0]), + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][1] & 0xffff + self.assertEqual(result, 0x4200, f"Expected 3.0 (0x4200), got 0x{result:04x}") + + def test_v_add_f16_src0_hi_different_reg(self): + """v_add_f16 with src0=v129 (v1.hi) reads high half of v1.""" + instructions = [ + s_mov_b32(s[0], 0x44004200), # v1: high=4.0, low=3.0 + v_mov_b32_e32(v[1], s[0]), + s_mov_b32(s[1], 0x3c00), # v0: low=1.0 + v_mov_b32_e32(v[0], s[1]), + # v_add_f16 v2, v129, v0 means: v2.lo = v1.hi + v0.lo = 4.0 + 1.0 = 5.0 + v_add_f16_e32(v[2], v[1].h, v[0]), + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][2] & 0xffff + self.assertEqual(result, 0x4500, f"Expected 5.0 (0x4500), got 0x{result:04x}") + + def test_v_mul_f16_src0_hi(self): + """v_mul_f16 with src0 from high half.""" + instructions = [ + s_mov_b32(s[0], 0x40003c00), # v0: high=2.0, low=1.0 + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x4200), # v1: low=3.0 + v_mov_b32_e32(v[1], s[1]), + # v_mul_f16 v2, v128, v1 means: v2.lo = v0.hi * v1.lo = 2.0 * 3.0 = 6.0 + v_mul_f16_e32(v[2], v[0].h, v[1]), + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][2] & 0xffff + self.assertEqual(result, 0x4600, f"Expected 6.0 (0x4600), got 0x{result:04x}") + + def test_v_add_f16_multilane(self): + """v_add_f16 with src0=v128 across multiple lanes.""" + instructions = [ + # Set up different packed values per lane using v_mov with lane-dependent values + # Lane 0: v0 = 0x40003c00 (hi=2.0, lo=1.0) -> sum = 3.0 + # Lane 1: v0 = 0x44004200 (hi=4.0, lo=3.0) -> sum = 7.0 + v_mov_b32_e32(v[0], 0x40003c00), # default for all lanes + # Use v_cmp to select lane 1 (v255 = lane_id from prologue) + v_cmp_eq_u32_e32(1, v[255]), # vcc = (lane == 1) + v_cndmask_b32_e64(v[0], v[0], 0x44004200, SrcEnum.VCC_LO), + # Now fold: v1.lo = v0.hi + v0.lo + v_add_f16_e32(v[1], v[0].h, v[0]), + ] + st = run_program(instructions, n_lanes=2) + # Lane 0: 2.0 + 1.0 = 3.0 (0x4200) + self.assertEqual(st.vgpr[0][1] & 0xffff, 0x4200, "Lane 0: expected 3.0") + # Lane 1: 4.0 + 3.0 = 7.0 (0x4700) + self.assertEqual(st.vgpr[1][1] & 0xffff, 0x4700, "Lane 1: expected 7.0") + + +class TestVOPC_16bit_HiHalf(unittest.TestCase): + """Regression tests for VOPC 16-bit ops reading from high half of VGPR (v128+ encoding). + + Bug: VOPC 16-bit ops like v_cmp_lt_f16 with vsrc1 as v128+ should read the HIGH 16 bits + of the corresponding VGPR. The emulator was incorrectly reading from VGPR v128+. + + Example: v_cmp_nge_f16 vcc, v0, v128 compares v0.lo with v0.hi + """ + + def test_v_cmp_lt_f16_vsrc1_hi(self): + """v_cmp_lt_f16 comparing low half with high half of same register.""" + instructions = [ + # v0: high=2.0 (0x4000), low=1.0 (0x3c00) + s_mov_b32(s[0], 0x40003c00), + v_mov_b32_e32(v[0], s[0]), + # v_cmp_lt_f16 vcc, v0, v128 means: vcc = (v0.lo < v0.hi) = (1.0 < 2.0) = true + v_cmp_lt_f16_e32(v[0], v[0].h), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "Expected vcc=1 (1.0 < 2.0)") + + def test_v_cmp_gt_f16_vsrc1_hi(self): + """v_cmp_gt_f16 with vsrc1 from high half.""" + instructions = [ + # v0: high=1.0 (0x3c00), low=2.0 (0x4000) + s_mov_b32(s[0], 0x3c004000), + v_mov_b32_e32(v[0], s[0]), + # v_cmp_gt_f16 vcc, v0, v128 means: vcc = (v0.lo > v0.hi) = (2.0 > 1.0) = true + v_cmp_gt_f16_e32(v[0], v[0].h), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "Expected vcc=1 (2.0 > 1.0)") + + def test_v_cmp_eq_f16_vsrc1_hi_equal(self): + """v_cmp_eq_f16 with equal low and high halves.""" + instructions = [ + # v0: high=3.0 (0x4200), low=3.0 (0x4200) + s_mov_b32(s[0], 0x42004200), + v_mov_b32_e32(v[0], s[0]), + # v_cmp_eq_f16 vcc, v0, v128 means: vcc = (v0.lo == v0.hi) = (3.0 == 3.0) = true + v_cmp_eq_f16_e32(v[0], v[0].h), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "Expected vcc=1 (3.0 == 3.0)") + + def test_v_cmp_neq_f16_vsrc1_hi(self): + """v_cmp_neq_f16 with different low and high halves.""" + instructions = [ + # v0: high=2.0 (0x4000), low=1.0 (0x3c00) + s_mov_b32(s[0], 0x40003c00), + v_mov_b32_e32(v[0], s[0]), + # v_cmp_neq_f16 vcc, v0, v128 means: vcc = (v0.lo != v0.hi) = (1.0 != 2.0) = true + v_cmp_lg_f16_e32(v[0], v[0].h), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "Expected vcc=1 (1.0 != 2.0)") + + def test_v_cmp_nge_f16_inf_self(self): + """v_cmp_nge_f16 comparing -inf with itself (unordered less than). + + Regression test: -inf < -inf should be false (IEEE 754). + The bug was VOPC 16-bit not handling v128+ encoding for vsrc1. + """ + instructions = [ + # v0: both halves = -inf (0xFC00) + s_mov_b32(s[0], 0xFC00FC00), + v_mov_b32_e32(v[0], s[0]), + # v_cmp_nge_f16 is "not greater or equal" which is equivalent to "unordered less than" + # -inf nge -inf should be false (since -inf >= -inf is true) + v_cmp_nge_f16_e32(v[0], v[0].h), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 0, "Expected vcc=0 (-inf >= -inf)") + + def test_v_cmp_f16_multilane(self): + """v_cmp_lt_f16 with vsrc1=v128 across multiple lanes.""" + instructions = [ + # Lane 0: v0 = 0x40003c00 (hi=2.0, lo=1.0) -> 1.0 < 2.0 = true + # Lane 1: v0 = 0x3c004000 (hi=1.0, lo=2.0) -> 2.0 < 1.0 = false + v_mov_b32_e32(v[0], 0x40003c00), # default + # Use v_cmp to select lane 1 (v255 = lane_id from prologue) + v_cmp_eq_u32_e32(1, v[255]), # vcc = (lane == 1) + v_cndmask_b32_e64(v[0], v[0], 0x3c004000, SrcEnum.VCC_LO), + v_cmp_lt_f16_e32(v[0], v[0].h), + ] + st = run_program(instructions, n_lanes=2) + self.assertEqual(st.vcc & 1, 1, "Lane 0: expected vcc=1 (1.0 < 2.0)") + self.assertEqual((st.vcc >> 1) & 1, 0, "Lane 1: expected vcc=0 (2.0 < 1.0)") + + +class TestF16SinKernelOps(unittest.TestCase): + """Tests for F16 instructions used in the sin kernel. Run with USE_HW=1 to compare emulator vs hardware.""" + + def test_v_cvt_i16_f16_zero(self): + """v_cvt_i16_f16: Convert f16 0.0 to i16 0.""" + instructions = [ + s_mov_b32(s[0], 0x00000000), # f16 0.0 in low bits + v_mov_b32_e32(v[0], s[0]), + v_cvt_i16_f16_e32(v[1], v[0]), + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][1] & 0xFFFF + self.assertEqual(result, 0, f"Expected 0, got {result}") + + def test_v_cvt_i16_f16_one(self): + """v_cvt_i16_f16: Convert f16 1.0 (0x3c00) to i16 1.""" + instructions = [ + s_mov_b32(s[0], 0x00003c00), # f16 1.0 in low bits + v_mov_b32_e32(v[0], s[0]), + v_cvt_i16_f16_e32(v[1], v[0]), + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][1] & 0xFFFF + self.assertEqual(result, 1, f"Expected 1, got {result}") + + def test_v_cvt_i16_f16_negative(self): + """v_cvt_i16_f16: Convert f16 -2.0 (0xc000) to i16 -2.""" + instructions = [ + s_mov_b32(s[0], 0x0000c000), # f16 -2.0 in low bits + v_mov_b32_e32(v[0], s[0]), + v_cvt_i16_f16_e32(v[1], v[0]), + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][1] & 0xFFFF + # -2 as signed 16-bit = 0xFFFE + self.assertEqual(result, 0xFFFE, f"Expected 0xFFFE (-2), got 0x{result:04x}") + + def test_v_cvt_i16_f16_from_hi(self): + """v_cvt_i16_f16: Convert f16 from high half of register.""" + instructions = [ + s_mov_b32(s[0], 0x3c000000), # f16 1.0 in HIGH bits, 0.0 in low + v_mov_b32_e32(v[0], s[0]), + v_cvt_i16_f16_e32(v[1], v[0].h), # Read from high half + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][1] & 0xFFFF + self.assertEqual(result, 1, f"Expected 1, got {result}") + + def test_v_bfe_i32_sign_extend(self): + """v_bfe_i32: Extract 16 bits with sign extension.""" + instructions = [ + s_mov_b32(s[0], 0x80000001), # low 16 bits = 0x0001 + v_mov_b32_e32(v[0], s[0]), + v_bfe_i32(v[1], v[0], 0, 16), # Extract bits 0-15 with sign extend + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][1] + self.assertEqual(result, 1, f"Expected 1, got {result}") + + def test_v_bfe_i32_sign_extend_negative(self): + """v_bfe_i32: Extract 16 bits with sign extension (negative value).""" + instructions = [ + s_mov_b32(s[0], 0x0000FFFE), # low 16 bits = 0xFFFE = -2 as i16 + v_mov_b32_e32(v[0], s[0]), + v_bfe_i32(v[1], v[0], 0, 16), # Extract bits 0-15 with sign extend + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][1] + # -2 sign-extended to 32 bits = 0xFFFFFFFE + self.assertEqual(result, 0xFFFFFFFE, f"Expected 0xFFFFFFFE (-2), got 0x{result:08x}") + + def test_v_cndmask_b16_select_src0(self): + """v_cndmask_b16: Select src0 when vcc=0.""" + instructions = [ + s_mov_b32(s[0], 0x3c003800), # src0.h=1.0, src0.l=0.5 + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x4000c000), # src1.h=2.0, src1.l=-2.0 + v_mov_b32_e32(v[1], s[1]), + s_mov_b32(s[SrcEnum.VCC_LO - 128], 0), # vcc = 0 + v_cndmask_b16(v[2], v[0], v[1], SrcEnum.VCC_LO), # Should select v0.l = 0.5 + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][2] & 0xFFFF + self.assertEqual(result, 0x3800, f"Expected 0x3800 (0.5), got 0x{result:04x}") + + def test_v_cndmask_b16_select_src1(self): + """v_cndmask_b16: Select src1 when vcc=1.""" + instructions = [ + s_mov_b32(s[0], 0x3c003800), # src0.h=1.0, src0.l=0.5 + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x4000c000), # src1.h=2.0, src1.l=-2.0 + v_mov_b32_e32(v[1], s[1]), + s_mov_b32(s[SrcEnum.VCC_LO - 128], 1), # vcc = 1 for lane 0 + v_cndmask_b16(v[2], v[0], v[1], SrcEnum.VCC_LO), # Should select v1.l = -2.0 + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][2] & 0xFFFF + self.assertEqual(result, 0xc000, f"Expected 0xc000 (-2.0), got 0x{result:04x}") + + def test_v_cndmask_b16_write_hi(self): + """v_cndmask_b16: Write to high half with opsel.""" + instructions = [ + s_mov_b32(s[0], 0x3c003800), # src0: hi=1.0, lo=0.5 + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x4000c000), # src1: hi=2.0, lo=-2.0 + v_mov_b32_e32(v[1], s[1]), + s_mov_b32(s[2], 0xDEAD0000), # v2 initial: hi=0xDEAD, lo=0 + v_mov_b32_e32(v[2], s[2]), + s_mov_b32(s[SrcEnum.VCC_LO - 128], 0), # vcc = 0 + # opsel=8 means write to high half (bit 3 = dst hi) + # opsel=1 means read src0 from hi, opsel=2 means read src1 from hi + # v_cndmask_b16 v2.h, v0.h, v1.h, vcc -> select v0.h = 1.0 + VOP3(VOP3Op.V_CNDMASK_B16, vdst=v[2], src0=v[0], src1=v[1], src2=SrcEnum.VCC_LO, opsel=0b1011), + ] + st = run_program(instructions, n_lanes=1) + result_hi = (st.vgpr[0][2] >> 16) & 0xFFFF + result_lo = st.vgpr[0][2] & 0xFFFF + self.assertEqual(result_hi, 0x3c00, f"Expected hi=0x3c00 (1.0), got 0x{result_hi:04x}") + self.assertEqual(result_lo, 0x0000, f"Expected lo preserved as 0, got 0x{result_lo:04x}") + + def test_v_mul_f16_basic(self): + """v_mul_f16: 2.0 * 3.0 = 6.0.""" + instructions = [ + s_mov_b32(s[0], 0x00004000), # f16 2.0 in low bits + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x00004200), # f16 3.0 in low bits + v_mov_b32_e32(v[1], s[1]), + v_mul_f16_e32(v[2], v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][2] & 0xFFFF + self.assertEqual(result, 0x4600, f"Expected 0x4600 (6.0), got 0x{result:04x}") + + def test_v_mul_f16_by_zero(self): + """v_mul_f16: x * 0.0 = 0.0.""" + instructions = [ + s_mov_b32(s[0], 0x00003c00), # f16 1.0 + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x00000000), # f16 0.0 + v_mov_b32_e32(v[1], s[1]), + v_mul_f16_e32(v[2], v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][2] & 0xFFFF + self.assertEqual(result, 0x0000, f"Expected 0x0000 (0.0), got 0x{result:04x}") + + def test_v_mul_f16_hi_half(self): + """v_mul_f16: Multiply using high halves.""" + instructions = [ + s_mov_b32(s[0], 0x40000000), # hi=2.0, lo=0.0 + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x42000000), # hi=3.0, lo=0.0 + v_mov_b32_e32(v[1], s[1]), + v_mul_f16_e32(v[2].h, v[0].h, v[1].h), # 2.0 * 3.0 = 6.0 in hi + ] + st = run_program(instructions, n_lanes=1) + result_hi = (st.vgpr[0][2] >> 16) & 0xFFFF + self.assertEqual(result_hi, 0x4600, f"Expected hi=0x4600 (6.0), got 0x{result_hi:04x}") + + def test_v_fmac_f16_basic(self): + """v_fmac_f16: dst = src0 * src1 + dst = 2.0 * 3.0 + 1.0 = 7.0.""" + instructions = [ + s_mov_b32(s[0], 0x00004000), # f16 2.0 + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x00004200), # f16 3.0 + v_mov_b32_e32(v[1], s[1]), + s_mov_b32(s[2], 0x00003c00), # f16 1.0 (accumulator) + v_mov_b32_e32(v[2], s[2]), + v_fmac_f16_e32(v[2], v[0], v[1]), # v2 = v0 * v1 + v2 + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][2] & 0xFFFF + self.assertEqual(result, 0x4700, f"Expected 0x4700 (7.0), got 0x{result:04x}") + + def test_v_fmac_f16_hi_dest(self): + """v_fmac_f16 with .h destination: dst.h = src0 * src1 + dst.h. + + This tests the case from AMD_LLVM sin(0) where V_FMAC_F16 writes to v0.h. + The accumulator D should be read from v0.h, not v0.l. + """ + from extra.assembly.amd.pcode import f32_to_f16, _f16 + # Set up: v0 = {hi=0.5, lo=1.0}, src0 = 0.0 (literal), src1 = v1.l (any value) + # Expected: v0.h = 0.0 * v1.l + 0.5 = 0.5 (unchanged) + instructions = [ + s_mov_b32(s[0], 0x38003c00), # v0 = {hi=0.5, lo=1.0} + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x38000000), # v1 = {hi=0.5, lo=0.0} + v_mov_b32_e32(v[1], s[1]), + # v_fmac_f16 v0.h, literal(0.318...), v1.l (vdst=128 for .h) + # D = D + S0 * S1 = v0.h + 0.318 * 0.0 = 0.5 + 0 = 0.5 + VOP2(VOP2Op.V_FMAC_F16, vdst=RawImm(128), src0=RawImm(255), vsrc1=RawImm(1), literal=0x3518), # 0.318... * 0.0 + 0.5 + ] + st = run_program(instructions, n_lanes=1) + v0 = st.vgpr[0][0] + result_hi = _f16((v0 >> 16) & 0xffff) + result_lo = _f16(v0 & 0xffff) + self.assertAlmostEqual(result_hi, 0.5, delta=0.01, msg=f"Expected v0.h=0.5, got {result_hi}") + self.assertAlmostEqual(result_lo, 1.0, delta=0.01, msg=f"Expected v0.l=1.0, got {result_lo}") + + def test_v_add_f16_basic(self): + """v_add_f16: 1.0 + 2.0 = 3.0.""" + instructions = [ + s_mov_b32(s[0], 0x00003c00), # f16 1.0 + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x00004000), # f16 2.0 + v_mov_b32_e32(v[1], s[1]), + v_add_f16_e32(v[2], v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][2] & 0xFFFF + self.assertEqual(result, 0x4200, f"Expected 0x4200 (3.0), got 0x{result:04x}") + + def test_v_add_f16_negative(self): + """v_add_f16: 1.0 + (-1.5703125) = -0.5703125.""" + # 0xbe48 is approximately -1.5703125 in f16 + instructions = [ + s_mov_b32(s[0], 0x00003c00), # f16 1.0 + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x0000be48), # f16 -1.5703125 + v_mov_b32_e32(v[1], s[1]), + v_add_f16_e32(v[2], v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][2] & 0xFFFF + # 1.0 + (-1.5703125) = -0.5703125 which is approximately 0xb890 + # Allow some tolerance - just check it's negative and close + from extra.assembly.amd.pcode import _f16 + result_f = _f16(result) + expected = 1.0 - 1.5703125 + self.assertAlmostEqual(result_f, expected, places=2, msg=f"Expected ~{expected}, got {result_f}") + + def test_v_fmaak_f16_basic(self): + """v_fmaak_f16: dst = src0 * vsrc1 + K.""" + # v_fmaak_f16 computes: D = S0 * S1 + K + # 2.0 * 3.0 + 1.0 = 7.0 + instructions = [ + s_mov_b32(s[0], 0x00004000), # f16 2.0 + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x00004200), # f16 3.0 + v_mov_b32_e32(v[1], s[1]), + v_fmaak_f16_e32(v[2], v[0], v[1], 0x3c00), # v2 = v0 * v1 + 1.0 + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][2] & 0xFFFF + self.assertEqual(result, 0x4700, f"Expected 0x4700 (7.0), got 0x{result:04x}") + + def test_v_fmamk_f32_basic(self): + """v_fmamk_f32: dst = src0 * K + vsrc1.""" + # v_fmamk_f32 computes: D = S0 * K + S1 + # 2.0 * 3.0 + 1.0 = 7.0 + instructions = [ + s_mov_b32(s[0], f2i(2.0)), + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], f2i(1.0)), # accumulator + v_mov_b32_e32(v[1], s[1]), + v_fmamk_f32_e32(v[2], v[0], f2i(3.0), v[1]), # v2 = v0 * 3.0 + v1 + ] + st = run_program(instructions, n_lanes=1) + result = i2f(st.vgpr[0][2]) + self.assertAlmostEqual(result, 7.0, places=5, msg=f"Expected 7.0, got {result}") + + def test_v_fmamk_f32_small_constant(self): + """v_fmamk_f32: Test with small constant like in sin kernel.""" + # This mimics part of the sin kernel: 1.0 * (-1.13e-4) + (-3.1414795) ≈ -3.1415926 + k_val = 0xb8ed5000 # approximately -0.0001131594 as f32 + s1_val = f2i(-3.1414794921875) + instructions = [ + s_mov_b32(s[0], f2i(1.0)), + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], s1_val), + v_mov_b32_e32(v[1], s[1]), + v_fmamk_f32_e32(v[2], v[0], k_val, v[1]), # v2 = 1.0 * K + v1 + ] + st = run_program(instructions, n_lanes=1) + result = i2f(st.vgpr[0][2]) + k_f32 = i2f(k_val) + expected = 1.0 * k_f32 + (-3.1414794921875) + self.assertAlmostEqual(result, expected, places=5, msg=f"Expected {expected}, got {result}") + + def test_v_mov_b16_to_hi(self): + """v_mov_b16: Move immediate to high half, preserving low.""" + instructions = [ + s_mov_b32(s[0], 0x0000DEAD), # initial: lo=0xDEAD, hi=0 + v_mov_b32_e32(v[0], s[0]), + v_mov_b16_e32(v[0].h, 0x3800), # Move 0.5 to high half + ] + st = run_program(instructions, n_lanes=1) + result_hi = (st.vgpr[0][0] >> 16) & 0xFFFF + result_lo = st.vgpr[0][0] & 0xFFFF + self.assertEqual(result_hi, 0x3800, f"Expected hi=0x3800, got 0x{result_hi:04x}") + self.assertEqual(result_lo, 0xDEAD, f"Expected lo=0xDEAD (preserved), got 0x{result_lo:04x}") + + def test_v_mov_b16_to_lo(self): + """v_mov_b16: Move immediate to low half, preserving high.""" + instructions = [ + s_mov_b32(s[0], 0xBEEF0000), # initial: hi=0xBEEF, lo=0 + v_mov_b32_e32(v[0], s[0]), + v_mov_b16_e32(v[0], 0x3c00), # Move 1.0 to low half + ] + st = run_program(instructions, n_lanes=1) + result_hi = (st.vgpr[0][0] >> 16) & 0xFFFF + result_lo = st.vgpr[0][0] & 0xFFFF + self.assertEqual(result_lo, 0x3c00, f"Expected lo=0x3c00, got 0x{result_lo:04x}") + self.assertEqual(result_hi, 0xBEEF, f"Expected hi=0xBEEF (preserved), got 0x{result_hi:04x}") + + def test_v_xor_b32_sign_flip(self): + """v_xor_b32: XOR with 0x8000 flips sign of f16 in low bits.""" + # 0x4246 is approximately 3.13671875 in f16 + # XOR with 0x8000 gives 0xC246 which is -3.13671875 + instructions = [ + s_mov_b32(s[0], 0x00004246), # f16 3.13671875 + v_mov_b32_e32(v[0], s[0]), + v_xor_b32_e32(v[1], 0x8000, v[0]), # Flip sign bit of low half + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][1] & 0xFFFF + self.assertEqual(result, 0xC246, f"Expected 0xC246 (-3.137), got 0x{result:04x}") + + def test_v_fma_mix_f32_all_f32_sources(self): + """v_fma_mix_f32: All sources as f32 (opsel_hi=0).""" + instructions = [ + s_mov_b32(s[0], f2i(2.0)), + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], f2i(3.0)), + v_mov_b32_e32(v[1], s[1]), + s_mov_b32(s[2], f2i(1.0)), + v_mov_b32_e32(v[2], s[2]), + # opsel_hi=0,0,0 means all sources are f32 + VOP3P(VOP3POp.V_FMA_MIX_F32, vdst=v[3], src0=v[0], src1=v[1], src2=v[2], opsel=0, opsel_hi=0, opsel_hi2=0), + ] + st = run_program(instructions, n_lanes=1) + result = i2f(st.vgpr[0][3]) + self.assertAlmostEqual(result, 7.0, places=5, msg=f"2*3+1=7, got {result}") + + def test_v_fma_mixlo_f16_all_f32_sources(self): + """v_fma_mixlo_f16: All sources as f32, result to low f16.""" + instructions = [ + s_mov_b32(s[0], f2i(1.0)), + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], f2i(-1.22e-10)), # Very small + v_mov_b32_e32(v[1], s[1]), + s_mov_b32(s[2], f2i(-3.1415927)), # -pi + v_mov_b32_e32(v[2], s[2]), + s_mov_b32(s[3], 0xDEAD0000), # Garbage in hi + v_mov_b32_e32(v[3], s[3]), + # 1.0 * (-1.22e-10) + (-3.1415927) ≈ -3.1415927 + VOP3P(VOP3POp.V_FMA_MIXLO_F16, vdst=v[3], src0=v[0], src1=v[1], src2=v[2], opsel=0, opsel_hi=0, opsel_hi2=0), + ] + st = run_program(instructions, n_lanes=1) + from extra.assembly.amd.pcode import _f16 + result_lo = _f16(st.vgpr[0][3] & 0xFFFF) + result_hi = (st.vgpr[0][3] >> 16) & 0xFFFF + # Result should be approximately -pi + self.assertAlmostEqual(result_lo, -3.14, delta=0.01, msg=f"Expected ~-3.14, got {result_lo}") + self.assertEqual(result_hi, 0xDEAD, f"Expected hi preserved as 0xDEAD, got 0x{result_hi:04x}") + + +class TestVCmpClassF16(unittest.TestCase): + """Tests for V_CMP_CLASS_F16 - critical for f16 sin/cos classification. + + Class bit mapping: + bit 0 = signaling NaN + bit 1 = quiet NaN + bit 2 = -infinity + bit 3 = -normal + bit 4 = -denormal + bit 5 = -zero + bit 6 = +zero + bit 7 = +denormal + bit 8 = +normal + bit 9 = +infinity + + This is crucial for the f16 sin kernel which uses v_cmp_class_f16 to detect + special values like +-0, +-inf, NaN and select appropriate outputs. + """ + + def test_cmp_class_f16_positive_zero(self): + """V_CMP_CLASS_F16: +zero should match bit 6.""" + # f16 +0.0 = 0x0000 + instructions = [ + v_mov_b32_e32(v[0], 0), # f16 +0.0 in low 16 bits + v_mov_b32_e32(v[1], 0x40), # bit 6 only (+zero) + v_cmp_class_f16_e32(v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +zero with mask 0x40") + + def test_cmp_class_f16_negative_zero(self): + """V_CMP_CLASS_F16: -zero should match bit 5.""" + # f16 -0.0 = 0x8000 + instructions = [ + s_mov_b32(s[0], 0x8000), # f16 -0.0 + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], 0x20), # bit 5 only (-zero) + v_cmp_class_f16_e32(v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for -zero with mask 0x20") + + def test_cmp_class_f16_positive_normal(self): + """V_CMP_CLASS_F16: +1.0 (normal) should match bit 8.""" + # f16 1.0 = 0x3c00 + instructions = [ + s_mov_b32(s[0], 0x3c00), # f16 +1.0 + s_mov_b32(s[1], 0x100), # bit 8 (+normal) + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], s[1]), + v_cmp_class_f16_e32(v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +1.0 with mask 0x100 (+normal)") + + def test_cmp_class_f16_negative_normal(self): + """V_CMP_CLASS_F16: -1.0 (normal) should match bit 3.""" + # f16 -1.0 = 0xbc00 + instructions = [ + s_mov_b32(s[0], 0xbc00), # f16 -1.0 + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], 0x08), # bit 3 (-normal) + v_cmp_class_f16_e32(v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for -1.0 with mask 0x08 (-normal)") + + def test_cmp_class_f16_positive_infinity(self): + """V_CMP_CLASS_F16: +inf should match bit 9.""" + # f16 +inf = 0x7c00 + instructions = [ + s_mov_b32(s[0], 0x7c00), # f16 +inf + s_mov_b32(s[1], 0x200), # bit 9 (+inf) + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], s[1]), + v_cmp_class_f16_e32(v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +inf with mask 0x200") + + def test_cmp_class_f16_negative_infinity(self): + """V_CMP_CLASS_F16: -inf should match bit 2.""" + # f16 -inf = 0xfc00 + instructions = [ + s_mov_b32(s[0], 0xfc00), # f16 -inf + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], 0x04), # bit 2 (-inf) + v_cmp_class_f16_e32(v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for -inf with mask 0x04") + + def test_cmp_class_f16_quiet_nan(self): + """V_CMP_CLASS_F16: quiet NaN should match bit 1.""" + # f16 quiet NaN = 0x7e00 (exponent all 1s, mantissa MSB set) + instructions = [ + s_mov_b32(s[0], 0x7e00), # f16 quiet NaN + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], 0x02), # bit 1 (quiet NaN) + v_cmp_class_f16_e32(v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for quiet NaN with mask 0x02") + + def test_cmp_class_f16_signaling_nan(self): + """V_CMP_CLASS_F16: signaling NaN should match bit 0.""" + # f16 signaling NaN = 0x7c01 (exponent all 1s, mantissa MSB clear, other mantissa bits set) + instructions = [ + s_mov_b32(s[0], 0x7c01), # f16 signaling NaN + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], 0x01), # bit 0 (signaling NaN) + v_cmp_class_f16_e32(v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for signaling NaN with mask 0x01") + + def test_cmp_class_f16_positive_denormal(self): + """V_CMP_CLASS_F16: positive denormal should match bit 7.""" + # f16 smallest positive denormal = 0x0001 + instructions = [ + v_mov_b32_e32(v[0], 1), # f16 +denormal (0x0001) + v_mov_b32_e32(v[1], 0x80), # bit 7 (+denormal) + v_cmp_class_f16_e32(v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +denormal with mask 0x80") + + def test_cmp_class_f16_negative_denormal(self): + """V_CMP_CLASS_F16: negative denormal should match bit 4.""" + # f16 smallest negative denormal = 0x8001 + instructions = [ + s_mov_b32(s[0], 0x8001), # f16 -denormal + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], 0x10), # bit 4 (-denormal) + v_cmp_class_f16_e32(v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for -denormal with mask 0x10") + + def test_cmp_class_f16_combined_mask_zeros(self): + """V_CMP_CLASS_F16: mask 0x60 covers both +zero and -zero.""" + # Test with +0.0 + instructions = [ + v_mov_b32_e32(v[0], 0), # f16 +0.0 + v_mov_b32_e32(v[1], 0x60), # bits 5 and 6 (+-zero) + v_cmp_class_f16_e32(v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +zero with mask 0x60") + + def test_cmp_class_f16_combined_mask_1f8(self): + """V_CMP_CLASS_F16: mask 0x1f8 covers -normal,-denorm,-zero,+zero,+denorm,+normal. + + This is the exact mask used in the f16 sin kernel at PC=46: + v_cmp_class_f16_e64 vcc_lo, v1, 0x1f8 + + The kernel uses this to detect if the input is a "normal" finite value + (not NaN, not infinity). If the check fails (vcc=0), it selects NaN output. + """ + # Test with +0.0 - should match via bit 6 + instructions = [ + v_mov_b32_e32(v[0], 0), # f16 +0.0 + s_mov_b32(s[0], 0x1f8), + v_mov_b32_e32(v[1], s[0]), # mask 0x1f8 + v_cmp_class_f16_e32(v[0], v[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +zero with mask 0x1f8") + + def test_cmp_class_f16_vop3_encoding(self): + """V_CMP_CLASS_F16 in VOP3 encoding (v_cmp_class_f16_e64). + + This tests the exact instruction encoding used in the f16 sin kernel. + VOP3 encoding allows the result to go to any SGPR pair, not just VCC. + """ + # v_cmp_class_f16_e64 vcc_lo, v0, 0x1f8 + # Use SGPR to hold the mask since literals require special handling + instructions = [ + v_mov_b32_e32(v[0], 0), # f16 +0.0 + s_mov_b32(s[0], 0x1f8), # class mask + VOP3(VOP3Op.V_CMP_CLASS_F16, vdst=RawImm(VCC), src0=v[0], src1=s[0]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +zero with VOP3 encoding") + + def test_cmp_class_f16_vop3_normal_positive(self): + """V_CMP_CLASS_F16 VOP3 encoding with +1.0 (normal).""" + # f16 1.0 = 0x3c00, should match bit 8 (+normal) in mask 0x1f8 + instructions = [ + s_mov_b32(s[0], 0x3c00), # f16 +1.0 + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x1f8), # class mask + VOP3(VOP3Op.V_CMP_CLASS_F16, vdst=RawImm(VCC), src0=v[0], src1=s[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 1, "VCC should be 1 for +1.0 (normal) with mask 0x1f8") + + def test_cmp_class_f16_vop3_nan_fails_mask(self): + """V_CMP_CLASS_F16 VOP3: NaN should NOT match mask 0x1f8 (no NaN bits set).""" + # f16 quiet NaN = 0x7e00, should NOT match mask 0x1f8 (bits 3-8 only) + instructions = [ + s_mov_b32(s[0], 0x7e00), # f16 quiet NaN + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x1f8), # class mask + VOP3(VOP3Op.V_CMP_CLASS_F16, vdst=RawImm(VCC), src0=v[0], src1=s[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 0, "VCC should be 0 for NaN with mask 0x1f8 (no NaN bits)") + + def test_cmp_class_f16_vop3_inf_fails_mask(self): + """V_CMP_CLASS_F16 VOP3: +inf should NOT match mask 0x1f8 (no inf bits set).""" + # f16 +inf = 0x7c00, should NOT match mask 0x1f8 (bits 3-8 only) + instructions = [ + s_mov_b32(s[0], 0x7c00), # f16 +inf + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], 0x1f8), # class mask + VOP3(VOP3Op.V_CMP_CLASS_F16, vdst=RawImm(VCC), src0=v[0], src1=s[1]), + ] + st = run_program(instructions, n_lanes=1) + self.assertEqual(st.vcc & 1, 0, "VCC should be 0 for +inf with mask 0x1f8 (no inf bits)") + + +class TestVOP3F16Modifiers(unittest.TestCase): + """Tests for VOP3 16-bit ops with abs/neg modifiers and inline constants. + + VOP3 16-bit ops must: + 1. Use f16 inline constants (not f32) + 2. Apply abs/neg modifiers as f16 operations (toggle bit 15) + + This is critical for sin/cos kernels that use v_cvt_f32_f16 with |abs| + and v_fma_f16 with inline constants. + """ + + def test_v_cvt_f32_f16_abs_negative(self): + """V_CVT_F32_F16 with |abs| on negative value.""" + from extra.assembly.amd.pcode import f32_to_f16 + f16_neg1 = f32_to_f16(-1.0) # 0xbc00 + instructions = [ + s_mov_b32(s[0], f16_neg1), + v_mov_b32_e32(v[1], s[0]), + v_cvt_f32_f16_e64(v[0], abs(v[1])), # |(-1.0)| = 1.0 + ] + st = run_program(instructions, n_lanes=1) + result = i2f(st.vgpr[0][0]) + self.assertAlmostEqual(result, 1.0, places=5, msg=f"Expected 1.0, got {result}") + + def test_v_cvt_f32_f16_abs_positive(self): + """V_CVT_F32_F16 with |abs| on positive value (should stay positive).""" + from extra.assembly.amd.pcode import f32_to_f16 + f16_2 = f32_to_f16(2.0) # 0x4000 + instructions = [ + s_mov_b32(s[0], f16_2), + v_mov_b32_e32(v[1], s[0]), + v_cvt_f32_f16_e64(v[0], abs(v[1])), # |2.0| = 2.0 + ] + st = run_program(instructions, n_lanes=1) + result = i2f(st.vgpr[0][0]) + self.assertAlmostEqual(result, 2.0, places=5, msg=f"Expected 2.0, got {result}") + + def test_v_cvt_f32_f16_neg_positive(self): + """V_CVT_F32_F16 with neg on positive value.""" + from extra.assembly.amd.pcode import f32_to_f16 + f16_2 = f32_to_f16(2.0) # 0x4000 + instructions = [ + s_mov_b32(s[0], f16_2), + v_mov_b32_e32(v[1], s[0]), + v_cvt_f32_f16_e64(v[0], -v[1]), # -(2.0) = -2.0 + ] + st = run_program(instructions, n_lanes=1) + result = i2f(st.vgpr[0][0]) + self.assertAlmostEqual(result, -2.0, places=5, msg=f"Expected -2.0, got {result}") + + def test_v_cvt_f32_f16_neg_negative(self): + """V_CVT_F32_F16 with neg on negative value (double negative).""" + from extra.assembly.amd.pcode import f32_to_f16 + f16_neg2 = f32_to_f16(-2.0) # 0xc000 + instructions = [ + s_mov_b32(s[0], f16_neg2), + v_mov_b32_e32(v[1], s[0]), + v_cvt_f32_f16_e64(v[0], -v[1]), # -(-2.0) = 2.0 + ] + st = run_program(instructions, n_lanes=1) + result = i2f(st.vgpr[0][0]) + self.assertAlmostEqual(result, 2.0, places=5, msg=f"Expected 2.0, got {result}") + + def test_v_fma_f16_inline_const_1_0(self): + """V_FMA_F16: a*b + 1.0 should use f16 inline constant.""" + from extra.assembly.amd.pcode import f32_to_f16, _f16 + # v4 = 0.3259 (f16), v6 = -0.4866 (f16), src2 = 1.0 inline + # Result: 0.3259 * (-0.4866) + 1.0 = 0.8413... + f16_a = f32_to_f16(0.325928) # 0x3537 + f16_b = f32_to_f16(-0.486572) # 0xb7c9 + instructions = [ + s_mov_b32(s[0], f16_a), + v_mov_b32_e32(v[4], s[0]), + s_mov_b32(s[1], f16_b), + v_mov_b32_e32(v[6], s[1]), + v_fma_f16(v[4], v[4], v[6], 1.0), # 1.0 is inline constant + ] + st = run_program(instructions, n_lanes=1) + result = _f16(st.vgpr[0][4] & 0xffff) + expected = 0.325928 * (-0.486572) + 1.0 + self.assertAlmostEqual(result, expected, delta=0.01, msg=f"Expected ~{expected:.4f}, got {result}") + + def test_v_fma_f16_inline_const_0_5(self): + """V_FMA_F16: a*b + 0.5 should use f16 inline constant.""" + from extra.assembly.amd.pcode import f32_to_f16, _f16 + f16_a = f32_to_f16(2.0) + f16_b = f32_to_f16(3.0) + instructions = [ + s_mov_b32(s[0], f16_a), + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], f16_b), + v_mov_b32_e32(v[1], s[1]), + v_fma_f16(v[2], v[0], v[1], 0.5), # 0.5 is inline constant + ] + st = run_program(instructions, n_lanes=1) + result = _f16(st.vgpr[0][2] & 0xffff) + expected = 2.0 * 3.0 + 0.5 + self.assertAlmostEqual(result, expected, delta=0.01, msg=f"Expected {expected}, got {result}") + + def test_v_fma_f16_inline_const_neg_1_0(self): + """V_FMA_F16: a*b + (-1.0) should use f16 inline constant.""" + from extra.assembly.amd.pcode import f32_to_f16, _f16 + f16_a = f32_to_f16(2.0) + f16_b = f32_to_f16(3.0) + instructions = [ + s_mov_b32(s[0], f16_a), + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], f16_b), + v_mov_b32_e32(v[1], s[1]), + v_fma_f16(v[2], v[0], v[1], -1.0), # -1.0 is inline constant + ] + st = run_program(instructions, n_lanes=1) + result = _f16(st.vgpr[0][2] & 0xffff) + expected = 2.0 * 3.0 + (-1.0) + self.assertAlmostEqual(result, expected, delta=0.01, msg=f"Expected {expected}, got {result}") + + def test_v_add_f16_abs_both(self): + """V_ADD_F16 with abs on both operands.""" + from extra.assembly.amd.pcode import f32_to_f16, _f16 + f16_neg2 = f32_to_f16(-2.0) + f16_neg3 = f32_to_f16(-3.0) + instructions = [ + s_mov_b32(s[0], f16_neg2), + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], f16_neg3), + v_mov_b32_e32(v[1], s[1]), + v_add_f16_e64(v[2], abs(v[0]), abs(v[1])), # |-2| + |-3| = 5 + ] + st = run_program(instructions, n_lanes=1) + result = _f16(st.vgpr[0][2] & 0xffff) + self.assertAlmostEqual(result, 5.0, delta=0.01, msg=f"Expected 5.0, got {result}") + + def test_v_mul_f16_neg_abs(self): + """V_MUL_F16 with neg on one operand and abs on another.""" + from extra.assembly.amd.pcode import f32_to_f16, _f16 + f16_2 = f32_to_f16(2.0) + f16_neg3 = f32_to_f16(-3.0) + instructions = [ + s_mov_b32(s[0], f16_2), + v_mov_b32_e32(v[0], s[0]), + s_mov_b32(s[1], f16_neg3), + v_mov_b32_e32(v[1], s[1]), + v_mul_f16_e64(v[2], -v[0], abs(v[1])), # -(2) * |-3| = -6 + ] + st = run_program(instructions, n_lanes=1) + result = _f16(st.vgpr[0][2] & 0xffff) + self.assertAlmostEqual(result, -6.0, delta=0.01, msg=f"Expected -6.0, got {result}") + + if __name__ == '__main__': unittest.main() + + +class TestVFmaMixSinCase(unittest.TestCase): + """Tests for the specific V_FMA_MIXLO_F16 case that fails in AMD_LLVM sin(0) kernel.""" + + def test_v_fma_mixlo_f16_sin_case(self): + """V_FMA_MIXLO_F16 case from sin kernel at pc=0x14e. + + This tests the specific operands that produce the wrong result: + - src0 = v3 = 0x3f800000 (f32 1.0) + - src1 = s6 = 0xaf05a309 (f32 tiny negative) + - src2 = v5 = 0xc0490fdb (f32 -π) + - Result should be approximately -π (tiny * 1.0 + -π ≈ -π) + """ + from extra.assembly.amd.pcode import _f16 + instructions = [ + # Set up operands as in the sin kernel + s_mov_b32(s[0], 0x3f800000), # f32 1.0 + v_mov_b32_e32(v[3], s[0]), + s_mov_b32(s[1], 0xaf05a309), # f32 tiny negative + s_mov_b32(s[6], s[1]), + s_mov_b32(s[2], 0xc0490fdb), # f32 -π + v_mov_b32_e32(v[5], s[2]), + # Pre-fill v3 with expected hi bits + s_mov_b32(s[3], 0x3f800000), # hi = f32 1.0 encoding (will be overwritten by opsel behavior) + v_mov_b32_e32(v[3], s[3]), + # V_FMA_MIXLO_F16: src0=v3 (259), src1=s6, src2=v5 (261), opsel=0, opsel_hi=0, opsel_hi2=0 + VOP3P(VOP3POp.V_FMA_MIXLO_F16, vdst=v[3], src0=v[3], src1=s[6], src2=v[5], opsel=0, opsel_hi=0, opsel_hi2=0), + ] + st = run_program(instructions, n_lanes=1) + lo = _f16(st.vgpr[0][3] & 0xffff) + # Result should be approximately -π = -3.14... + # f16 -π ≈ 0xc248 = -3.140625 + self.assertAlmostEqual(lo, -3.14159, delta=0.01, msg=f"Expected ~-π, got {lo}") diff --git a/extra/assembly/amd/test/test_roundtrip.py b/extra/assembly/amd/test/test_roundtrip.py index 7fe12d9855..bf9b68d869 100644 --- a/extra/assembly/amd/test/test_roundtrip.py +++ b/extra/assembly/amd/test/test_roundtrip.py @@ -31,7 +31,12 @@ def detect_format(data: bytes) -> type[Inst] | None: # Check 64-bit formats if len(data) >= 8: - if enc_8bit in (0xD4, 0xD5, 0xD7): return VOP3 + if enc_8bit in (0xD4, 0xD5, 0xD7): + # VOP3 and VOP3SD share encoding - check opcode to determine which + # VOP3SD opcodes: 288-290 (v_*_co_ci_*), 764-770 (v_div_scale_*, v_mad_*, v_*_co_u32) + op = (int.from_bytes(data[:8], 'little') >> 16) & 0x3FF + if op in {288, 289, 290, 764, 765, 766, 767, 768, 769, 770}: return VOP3SD + return VOP3 if enc_8bit == 0xD6: return VOP3SD if enc_8bit == 0xCC: return VOP3P if enc_8bit == 0xCD: return VINTERP From 9c89be5235eabc403d25aa95aa72a64e41794682 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Tue, 30 Dec 2025 09:25:40 -0500 Subject: [PATCH 3/8] assembly/amd: fix v_perm_b32 + PC fixes (#13897) * assembly/amd: fix v_perm_b32 * add pc support --- extra/assembly/amd/autogen/cdna/gen_pcode.py | 3484 ++++++++++++----- extra/assembly/amd/autogen/rdna3/gen_pcode.py | 2852 +++++++++----- extra/assembly/amd/autogen/rdna4/gen_pcode.py | 2660 ++++++++----- extra/assembly/amd/emu.py | 69 +- extra/assembly/amd/pcode.py | 45 +- extra/assembly/amd/test/test_emu.py | 24 + 6 files changed, 6134 insertions(+), 3000 deletions(-) diff --git a/extra/assembly/amd/autogen/cdna/gen_pcode.py b/extra/assembly/amd/autogen/cdna/gen_pcode.py index 53a39ddfce..cb2f3e8f06 100644 --- a/extra/assembly/amd/autogen/cdna/gen_pcode.py +++ b/extra/assembly/amd/autogen/cdna/gen_pcode.py @@ -5,7 +5,7 @@ from extra.assembly.amd.autogen.cdna import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3POp, VOPCOp, VOP3AOp, VOP3BOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -15,7 +15,7 @@ def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b64 = S0.b64 S0 = Reg(s0) D0 = Reg(d0) @@ -26,7 +26,7 @@ def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.b32 = S0.b32 # endif @@ -40,7 +40,7 @@ def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.b64 = S0.b64 # endif @@ -55,7 +55,7 @@ def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32; # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -68,7 +68,7 @@ def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~S0.u64; # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -82,7 +82,7 @@ def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # declare i : 6'U; # for i in 6'0U : 6'31U do @@ -104,7 +104,7 @@ def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0ULL; # declare i : 6'U; # for i in 6'0U : 6'63U do @@ -127,7 +127,7 @@ def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -137,7 +137,7 @@ def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[63 : 0] = S0.u64[0 : 63] S0 = Reg(s0) D0 = Reg(d0) @@ -148,7 +148,7 @@ def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'0U ? 1 : 0 @@ -169,7 +169,7 @@ def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'0U ? 1 : 0 @@ -191,7 +191,7 @@ def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'1U ? 1 : 0 @@ -212,7 +212,7 @@ def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'1U ? 1 : 0 @@ -234,7 +234,7 @@ def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _SOP1Op_S_FF0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FF0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no zeros are found # for i in 0 : 31 do @@ -257,7 +257,7 @@ def _SOP1Op_S_FF0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FF0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FF0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no zeros are found # for i in 0 : 63 do @@ -280,7 +280,7 @@ def _SOP1Op_S_FF0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FF1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FF1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -303,7 +303,7 @@ def _SOP1Op_S_FF1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FF1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FF1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -326,7 +326,7 @@ def _SOP1Op_S_FF1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLBIT_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLBIT_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -349,7 +349,7 @@ def _SOP1Op_S_FLBIT_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLBIT_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLBIT_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -372,7 +372,7 @@ def _SOP1Op_S_FLBIT_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLBIT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLBIT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -395,7 +395,7 @@ def _SOP1Op_S_FLBIT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLBIT_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLBIT_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if all bits are the same # for i in 1 : 63 do @@ -418,7 +418,7 @@ def _SOP1Op_S_FLBIT_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i8)) S0 = Reg(s0) D0 = Reg(d0) @@ -428,7 +428,7 @@ def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -438,7 +438,7 @@ def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[S0.u32[4 : 0]] = 1'0U S0 = Reg(s0) D0 = Reg(d0) @@ -448,7 +448,7 @@ def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[S0.u32[5 : 0]] = 1'0U S0 = Reg(s0) D0 = Reg(d0) @@ -459,7 +459,7 @@ def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[S0.u32[4 : 0]] = 1'1U S0 = Reg(s0) D0 = Reg(d0) @@ -469,7 +469,7 @@ def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[S0.u32[5 : 0]] = 1'1U S0 = Reg(s0) D0 = Reg(d0) @@ -480,7 +480,62 @@ def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_GETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.i64 = PC + 4LL + D0 = Reg(d0) + PC = Reg(pc) + # --- compiled pseudocode --- + D0.i64 = PC + 4 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = S0.i64 + S0 = Reg(s0) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(S0.i64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SWAPPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # jump_addr = S0.i64; + # D0.i64 = PC + 4LL; + # PC = jump_addr.i64 + S0 = Reg(s0) + D0 = Reg(d0) + PC = Reg(pc) + # --- compiled pseudocode --- + jump_addr = S0.i64 + D0.i64 = PC + 4 + PC = Reg(jump_addr.i64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_RFE_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = S0.i64 + S0 = Reg(s0) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(S0.i64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -503,7 +558,7 @@ def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination # saveexec = EXEC.u64; @@ -526,7 +581,7 @@ def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -549,7 +604,7 @@ def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_ANDN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ANDN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; @@ -572,7 +627,7 @@ def _SOP1Op_S_ANDN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result['d0_64'] = True return result -def _SOP1Op_S_ORN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ORN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; @@ -595,7 +650,7 @@ def _SOP1Op_S_ORN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -618,7 +673,7 @@ def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -641,7 +696,7 @@ def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -664,7 +719,7 @@ def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # for i in 0 : 7 do # tmp[i] = S0.u32[i * 4 +: 4] != 0U @@ -685,7 +740,7 @@ def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0ULL; # for i in 0 : 15 do # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL @@ -707,7 +762,7 @@ def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; # SCC = D0.i32 != 0 S0 = Reg(s0) @@ -720,7 +775,7 @@ def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_SET_GPR_IDX_IDX(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SET_GPR_IDX_IDX(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # M0[7 : 0] = S0.u32[7 : 0].b8 S0 = Reg(s0) # --- compiled pseudocode --- @@ -729,7 +784,7 @@ def _SOP1Op_S_SET_GPR_IDX_IDX(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': d0, 'scc': scc & 1} return result -def _SOP1Op_S_ANDN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ANDN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; @@ -752,7 +807,7 @@ def _SOP1Op_S_ANDN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result['d0_64'] = True return result -def _SOP1Op_S_ORN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ORN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; @@ -775,7 +830,7 @@ def _SOP1Op_S_ORN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_ANDN1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ANDN1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -796,7 +851,7 @@ def _SOP1Op_S_ANDN1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_ANDN2_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ANDN2_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -817,7 +872,7 @@ def _SOP1Op_S_ANDN2_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32; # for i in 0 : 31 do # D0.u64[i * 2] = tmp[i]; @@ -865,6 +920,10 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_BITSET0_B64: _SOP1Op_S_BITSET0_B64, SOP1Op.S_BITSET1_B32: _SOP1Op_S_BITSET1_B32, SOP1Op.S_BITSET1_B64: _SOP1Op_S_BITSET1_B64, + SOP1Op.S_GETPC_B64: _SOP1Op_S_GETPC_B64, + SOP1Op.S_SETPC_B64: _SOP1Op_S_SETPC_B64, + SOP1Op.S_SWAPPC_B64: _SOP1Op_S_SWAPPC_B64, + SOP1Op.S_RFE_B64: _SOP1Op_S_RFE_B64, SOP1Op.S_AND_SAVEEXEC_B64: _SOP1Op_S_AND_SAVEEXEC_B64, SOP1Op.S_OR_SAVEEXEC_B64: _SOP1Op_S_OR_SAVEEXEC_B64, SOP1Op.S_XOR_SAVEEXEC_B64: _SOP1Op_S_XOR_SAVEEXEC_B64, @@ -884,7 +943,7 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_BITREPLICATE_B64_B32: _SOP1Op_S_BITREPLICATE_B64_B32, } -def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -901,7 +960,7 @@ def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -918,7 +977,7 @@ def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i32 + S1.i32; # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -935,7 +994,7 @@ def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i32 - S1.i32; # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -952,7 +1011,7 @@ def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -969,7 +1028,7 @@ def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - SCC.u32; # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -986,7 +1045,7 @@ def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 S0 = Reg(s0) @@ -1000,7 +1059,7 @@ def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) @@ -1014,7 +1073,7 @@ def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 S0 = Reg(s0) @@ -1028,7 +1087,7 @@ def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) @@ -1042,7 +1101,7 @@ def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1054,7 +1113,7 @@ def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = SCC ? S0.u64 : S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -1067,7 +1126,7 @@ def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1081,7 +1140,7 @@ def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1096,7 +1155,7 @@ def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1110,7 +1169,7 @@ def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1125,7 +1184,7 @@ def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result['d0_64'] = True return result -def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1139,7 +1198,7 @@ def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1154,7 +1213,7 @@ def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_ANDN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ANDN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & ~S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1168,7 +1227,7 @@ def _SOP2Op_S_ANDN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ANDN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ANDN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 & ~S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1183,7 +1242,7 @@ def _SOP2Op_S_ANDN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _SOP2Op_S_ORN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ORN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | ~S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1197,7 +1256,7 @@ def _SOP2Op_S_ORN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ORN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ORN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 | ~S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1212,7 +1271,7 @@ def _SOP2Op_S_ORN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 & S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1226,7 +1285,7 @@ def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1241,7 +1300,7 @@ def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 | S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1255,7 +1314,7 @@ def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1270,7 +1329,7 @@ def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1284,7 +1343,7 @@ def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1299,7 +1358,7 @@ def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 << S1[4 : 0].u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1313,7 +1372,7 @@ def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 << S1[5 : 0].u32); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1328,7 +1387,7 @@ def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 >> S1[4 : 0].u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1342,7 +1401,7 @@ def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 >> S1[5 : 0].u32); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1357,7 +1416,7 @@ def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); # SCC = D0.i32 != 0 S0 = Reg(s0) @@ -1371,7 +1430,7 @@ def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); # SCC = D0.i64 != 0LL S0 = Reg(s0) @@ -1386,7 +1445,7 @@ def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -1397,7 +1456,7 @@ def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -1409,7 +1468,7 @@ def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 * S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1420,7 +1479,7 @@ def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1434,7 +1493,7 @@ def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); # SCC = D0.i32 != 0 @@ -1451,7 +1510,7 @@ def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1466,7 +1525,7 @@ def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); # SCC = D0.i64 != 0LL @@ -1484,7 +1543,7 @@ def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 - S1.i32; # if D0.i32 < 0 then # D0.i32 = -D0.i32 @@ -1503,7 +1562,7 @@ def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -1514,7 +1573,7 @@ def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -1525,7 +1584,7 @@ def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1542,7 +1601,7 @@ def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1559,7 +1618,7 @@ def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1576,7 +1635,7 @@ def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1593,7 +1652,7 @@ def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -1604,7 +1663,7 @@ def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -1615,7 +1674,7 @@ def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -1680,7 +1739,7 @@ SOP2Op_FUNCTIONS = { SOP2Op.S_PACK_HH_B32_B16: _SOP2Op_S_PACK_HH_B32_B16, } -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 == S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1691,7 +1750,7 @@ def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1702,7 +1761,7 @@ def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1713,7 +1772,7 @@ def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1724,7 +1783,7 @@ def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1735,7 +1794,7 @@ def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1746,7 +1805,7 @@ def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 == S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1757,7 +1816,7 @@ def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1768,7 +1827,7 @@ def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1779,7 +1838,7 @@ def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1790,7 +1849,7 @@ def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1801,7 +1860,7 @@ def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -1812,7 +1871,7 @@ def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U S0 = Reg(s0) S1 = Reg(s1) @@ -1823,7 +1882,7 @@ def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U S0 = Reg(s0) S1 = Reg(s1) @@ -1834,7 +1893,7 @@ def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U S0 = Reg(s0) S1 = Reg(s1) @@ -1845,7 +1904,7 @@ def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U S0 = Reg(s0) S1 = Reg(s1) @@ -1856,7 +1915,7 @@ def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_SETVSKIP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_SETVSKIP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VSKIP = S0.u32[S1.u32[4 : 0]] S0 = Reg(s0) S1 = Reg(s1) @@ -1866,7 +1925,7 @@ def _SOPCOp_S_SETVSKIP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': d0, 'scc': scc & 1} return result -def _SOPCOp_S_SET_GPR_IDX_ON(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_SET_GPR_IDX_ON(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # specified in the SRC0 operand. The raw bits of the SRC1 field are read and used to set the enable bits. S1[0] = # VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and S1[3] = VDST_REL. # M0[7 : 0] = S0.u32[7 : 0].b8; @@ -1883,7 +1942,7 @@ def _SOPCOp_S_SET_GPR_IDX_ON(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64 == S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -1894,7 +1953,7 @@ def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -1928,7 +1987,7 @@ SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_LG_U64: _SOPCOp_S_CMP_LG_U64, } -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -1938,7 +1997,7 @@ def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.i32 = 32'I(signext(S0.i16)) # endif @@ -1952,7 +2011,7 @@ def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 == 32'I(signext(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -1963,7 +2022,7 @@ def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 != 32'I(signext(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -1974,7 +2033,7 @@ def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 > 32'I(signext(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -1985,7 +2044,7 @@ def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= 32'I(signext(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -1996,7 +2055,7 @@ def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < 32'I(signext(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -2007,7 +2066,7 @@ def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <= 32'I(signext(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -2018,7 +2077,7 @@ def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 == 32'U(S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -2029,7 +2088,7 @@ def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 != 32'U(S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -2040,7 +2099,7 @@ def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 > 32'U(S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -2051,7 +2110,7 @@ def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= 32'U(S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -2062,7 +2121,7 @@ def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < 32'U(S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -2073,7 +2132,7 @@ def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <= 32'U(S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -2084,7 +2143,7 @@ def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # D0.i32 = D0.i32 + 32'I(signext(S0.i16)); # SCC = ((tmp[31] == S0.i16[15]) && (tmp[31] != D0.i32[31])); @@ -2100,7 +2159,7 @@ def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = D0.i32 * 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -2110,6 +2169,22 @@ def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result +def _SOPKOp_S_CALL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.i64 = PC + 4LL; + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + D0 = Reg(d0) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + D0.i64 = PC + 4 + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + SOPKOp_FUNCTIONS = { SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, SOPKOp.S_CMOVK_I32: _SOPKOp_S_CMOVK_I32, @@ -2127,9 +2202,10 @@ SOPKOp_FUNCTIONS = { SOPKOp.S_CMPK_LE_U32: _SOPKOp_S_CMPK_LE_U32, SOPKOp.S_ADDK_I32: _SOPKOp_S_ADDK_I32, SOPKOp.S_MULK_I32: _SOPKOp_S_MULK_I32, + SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64, } -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # for i in 0U : SIMM16.u16[3 : 0].u32 do # endfor SIMM16 = Reg(literal) @@ -2140,16 +2216,238 @@ def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _var result = {'d0': d0, 'scc': scc & 1} return result -def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_BRANCH(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL; + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_SCC0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if SCC == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SCC = Reg(scc) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if SCC == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_SCC1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if SCC == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SCC = Reg(scc) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if SCC == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_VCCZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # If VCCZ is 1 then jump to a constant offset relative to the current PC. + # if VCCZ.u1 == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + VCC = Reg(vcc) + SIMM16 = Reg(literal) + PC = Reg(pc) + VCCZ = Reg(1 if VCC._val == 0 else 0) + # --- compiled pseudocode --- + if VCCZ.u1 == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_VCCNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # If VCCZ is 0 then jump to a constant offset relative to the current PC. + # if VCCZ.u1 == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + VCC = Reg(vcc) + SIMM16 = Reg(literal) + PC = Reg(pc) + VCCZ = Reg(1 if VCC._val == 0 else 0) + # --- compiled pseudocode --- + if VCCZ.u1 == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_EXECZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if EXECZ.u1 == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + EXEC = Reg(exec_mask) + SIMM16 = Reg(literal) + PC = Reg(pc) + EXECZ = Reg(1 if EXEC._val == 0 else 0) + # --- compiled pseudocode --- + if EXECZ.u1 == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_EXECNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if EXECZ.u1 == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + EXEC = Reg(exec_mask) + SIMM16 = Reg(literal) + PC = Reg(pc) + EXECZ = Reg(1 if EXEC._val == 0 else 0) + # --- compiled pseudocode --- + if EXECZ.u1 == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // PC passed into trap handler points to S_TRAP itself, + # PC = TBA.i64; # // trap base address + PC = Reg(pc) # --- compiled pseudocode --- # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _SOPPOp_S_SET_GPR_IDX_MODE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_CBRANCH_CDBGSYS(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if WAVE_STATUS.COND_DBG_SYS.u32 != 0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if WAVE_STATUS.COND_DBG_SYS.u32 != 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGUSER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if WAVE_STATUS.COND_DBG_USER.u32 != 0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if WAVE_STATUS.COND_DBG_USER.u32 != 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if (WAVE_STATUS.COND_DBG_SYS || WAVE_STATUS.COND_DBG_USER) then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if (WAVE_STATUS.COND_DBG_SYS or WAVE_STATUS.COND_DBG_USER): + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if (WAVE_STATUS.COND_DBG_SYS && WAVE_STATUS.COND_DBG_USER) then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if (WAVE_STATUS.COND_DBG_SYS and WAVE_STATUS.COND_DBG_USER): + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_SET_GPR_IDX_MODE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Get Doorbell ID 10 - Returns doorbell into EXEC, with the doorbell physical address in bits EXEC = Reg(exec_mask) # --- compiled pseudocode --- @@ -2161,11 +2459,22 @@ def _SOPPOp_S_SET_GPR_IDX_MODE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera SOPPOp_FUNCTIONS = { SOPPOp.S_NOP: _SOPPOp_S_NOP, + SOPPOp.S_BRANCH: _SOPPOp_S_BRANCH, + SOPPOp.S_CBRANCH_SCC0: _SOPPOp_S_CBRANCH_SCC0, + SOPPOp.S_CBRANCH_SCC1: _SOPPOp_S_CBRANCH_SCC1, + SOPPOp.S_CBRANCH_VCCZ: _SOPPOp_S_CBRANCH_VCCZ, + SOPPOp.S_CBRANCH_VCCNZ: _SOPPOp_S_CBRANCH_VCCNZ, + SOPPOp.S_CBRANCH_EXECZ: _SOPPOp_S_CBRANCH_EXECZ, + SOPPOp.S_CBRANCH_EXECNZ: _SOPPOp_S_CBRANCH_EXECNZ, SOPPOp.S_TRAP: _SOPPOp_S_TRAP, + SOPPOp.S_CBRANCH_CDBGSYS: _SOPPOp_S_CBRANCH_CDBGSYS, + SOPPOp.S_CBRANCH_CDBGUSER: _SOPPOp_S_CBRANCH_CDBGUSER, + SOPPOp.S_CBRANCH_CDBGSYS_OR_USER: _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER, + SOPPOp.S_CBRANCH_CDBGSYS_AND_USER: _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER, SOPPOp.S_SET_GPR_IDX_MODE: _SOPPOp_S_SET_GPR_IDX_MODE, } -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -2175,7 +2484,7 @@ def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'I; # if EXEC == 0x0LL then # lane = 0; @@ -2199,7 +2508,7 @@ def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f64_to_i32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2209,7 +2518,7 @@ def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = i32_to_f64(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -2220,7 +2529,7 @@ def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -2230,7 +2539,7 @@ def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -2240,7 +2549,7 @@ def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2250,7 +2559,7 @@ def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2260,7 +2569,7 @@ def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2270,7 +2579,7 @@ def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2280,7 +2589,7 @@ def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) S0 = Reg(s0) D0 = Reg(d0) @@ -2290,7 +2599,7 @@ def _VOP1Op_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32)) S0 = Reg(s0) D0 = Reg(d0) @@ -2300,7 +2609,7 @@ def _VOP1Op_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f64_to_f32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2310,7 +2619,7 @@ def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = f32_to_f64(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2321,7 +2630,7 @@ def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[7 : 0].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -2331,7 +2640,7 @@ def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[15 : 8].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -2341,7 +2650,7 @@ def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[23 : 16].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -2351,7 +2660,7 @@ def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[31 : 24].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -2361,7 +2670,7 @@ def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f64_to_u32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2371,7 +2680,7 @@ def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = u32_to_f64(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -2382,7 +2691,7 @@ def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2393,7 +2702,7 @@ def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -2409,7 +2718,7 @@ def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -2425,7 +2734,7 @@ def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -2441,7 +2750,7 @@ def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + -floor(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2451,7 +2760,7 @@ def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2461,7 +2770,7 @@ def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -2476,7 +2785,7 @@ def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -2491,7 +2800,7 @@ def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -2506,7 +2815,7 @@ def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2516,7 +2825,7 @@ def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2526,7 +2835,7 @@ def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -2536,7 +2845,7 @@ def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception S0 = Reg(s0) @@ -2547,7 +2856,7 @@ def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2557,7 +2866,7 @@ def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / S0.f64 S0 = Reg(s0) D0 = Reg(d0) @@ -2568,7 +2877,7 @@ def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2579,7 +2888,7 @@ def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -2589,7 +2898,7 @@ def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2600,7 +2909,7 @@ def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -2610,7 +2919,7 @@ def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -2620,7 +2929,7 @@ def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32 S0 = Reg(s0) D0 = Reg(d0) @@ -2630,7 +2939,7 @@ def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -2640,7 +2949,7 @@ def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -2660,7 +2969,7 @@ def _VOP1Op_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -2680,7 +2989,7 @@ def _VOP1Op_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -2700,7 +3009,7 @@ def _VOP1Op_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -2717,7 +3026,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -2735,7 +3044,7 @@ def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + -floor(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -2746,7 +3055,7 @@ def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -2763,7 +3072,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -2780,7 +3089,7 @@ def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b64 = S0.b64 S0 = Reg(s0) D0 = Reg(d0) @@ -2791,7 +3100,7 @@ def _VOP1Op_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = u16_to_f16(S0.u16) S0 = Reg(s0) D0 = Reg(d0) @@ -2801,7 +3110,7 @@ def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = i16_to_f16(S0.i16) S0 = Reg(s0) D0 = Reg(d0) @@ -2811,7 +3120,7 @@ def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_u16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2821,7 +3130,7 @@ def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_i16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2831,7 +3140,7 @@ def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16 S0 = Reg(s0) D0 = Reg(d0) @@ -2841,7 +3150,7 @@ def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2851,7 +3160,7 @@ def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2861,7 +3170,7 @@ def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2871,7 +3180,7 @@ def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2881,7 +3190,7 @@ def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.f16 = S0.f16 # else @@ -2898,7 +3207,7 @@ def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.i16 = 16'0 # else @@ -2915,7 +3224,7 @@ def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -2930,7 +3239,7 @@ def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -2945,7 +3254,7 @@ def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2955,7 +3264,7 @@ def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -2970,7 +3279,7 @@ def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + -floor(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -2980,7 +3289,7 @@ def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -2990,7 +3299,7 @@ def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -3000,7 +3309,7 @@ def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_snorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -3010,7 +3319,7 @@ def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_unorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -3020,11 +3329,7 @@ def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # if n <= 16'0 then - # elsif n >= 16'255 then - # else - # endif); +def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 16'0; # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); @@ -3033,12 +3338,6 @@ def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) tmp = Reg(0) # --- compiled pseudocode --- - if n <= 0: - pass - elif n >= 255: - pass - else: - pass tmp = Reg(0) tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) @@ -3047,7 +3346,7 @@ def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.b32; # D0.b32 = S0.b32; # S0.b32 = tmp @@ -3062,7 +3361,7 @@ def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SDWA_SRC0_SEL == BYTE1.b3 then # D0.f32 = fp8_to_f32(S0[15 : 8].fp8) # elsif SDWA_SRC0_SEL == BYTE2.b3 then @@ -3089,7 +3388,7 @@ def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SDWA_SRC0_SEL == BYTE1.b3 then # D0.f32 = bf8_to_f32(S0[15 : 8].bf8) # elsif SDWA_SRC0_SEL == BYTE2.b3 then @@ -3116,7 +3415,7 @@ def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = SDWA_SRC0_SEL[1 : 0] == WORD1.b2 ? S0[31 : 16] : S0[15 : 0]; # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) @@ -3133,7 +3432,7 @@ def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = SDWA_SRC0_SEL[1 : 0] == WORD1.b2 ? S0[31 : 16] : S0[15 : 0]; # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) @@ -3150,7 +3449,7 @@ def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_PERMLANE16_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_PERMLANE16_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # for pass in 0 : 1 do # for lane in 0 : 15 do # tmp = VGPR[pass * 32 + lane][SRC0.u32]; @@ -3166,7 +3465,7 @@ def _VOP1Op_V_PERMLANE16_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP1Op_V_PERMLANE32_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_PERMLANE32_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # for lane in 0 : 31 do # tmp = VGPR[lane][SRC0.u32]; # endfor @@ -3179,7 +3478,7 @@ def _VOP1Op_V_PERMLANE32_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 32'F({ S0.b16, 16'0U }) # V_CMPX_{COMPF}_F16 16-bit float compare. Also writes EXEC. 0x30 to 0x3F # V_CMPX_{COMPF}_F32 32-bit float compare. Also writes EXEC. 0x50 to 0x5F @@ -3283,7 +3582,7 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_CVT_F32_BF16: _VOP1Op_V_CVT_F32_BF16, } -def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -3297,7 +3596,7 @@ def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -3308,7 +3607,7 @@ def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -3319,7 +3618,7 @@ def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S1.f32 - S0.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -3330,7 +3629,7 @@ def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = fma(S0.f64, S1.f64, D0.f64) S0 = Reg(s0) S1 = Reg(s1) @@ -3342,7 +3641,7 @@ def _VOP2Op_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -3353,7 +3652,7 @@ def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) S0 = Reg(s0) S1 = Reg(s1) @@ -3364,7 +3663,7 @@ def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -3375,7 +3674,7 @@ def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) S0 = Reg(s0) S1 = Reg(s1) @@ -3386,7 +3685,7 @@ def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -3397,7 +3696,7 @@ def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then @@ -3435,7 +3734,7 @@ def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then @@ -3477,7 +3776,7 @@ def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -3488,7 +3787,7 @@ def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -3499,7 +3798,7 @@ def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -3510,7 +3809,7 @@ def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -3521,7 +3820,7 @@ def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3532,7 +3831,7 @@ def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3543,7 +3842,7 @@ def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 << S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3554,7 +3853,7 @@ def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3565,7 +3864,7 @@ def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3576,7 +3875,7 @@ def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3587,7 +3886,7 @@ def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.f32; # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); @@ -3605,7 +3904,7 @@ def _VOP2Op_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -3617,7 +3916,7 @@ def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -3629,7 +3928,7 @@ def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. @@ -3649,7 +3948,7 @@ def _VOP2Op_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -3669,7 +3968,7 @@ def _VOP2Op_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32; # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -3689,7 +3988,7 @@ def _VOP2Op_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. @@ -3709,7 +4008,7 @@ def _VOP2Op_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -3729,7 +4028,7 @@ def _VOP2Op_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -3749,7 +4048,7 @@ def _VOP2Op_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -3760,7 +4059,7 @@ def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -3771,7 +4070,7 @@ def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S1.f16 - S0.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -3782,7 +4081,7 @@ def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -3793,7 +4092,7 @@ def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.f16 * S1.f16 + D0.f16; # if OPSEL.u4[3] then # D0 = { tmp.f16, D0[15 : 0] } @@ -3814,7 +4113,7 @@ def _VOP2Op_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MADMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MADMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.f16 * SIMM16.f16 + S1.f16; S0 = Reg(s0) S1 = Reg(s1) @@ -3826,7 +4125,7 @@ def _VOP2Op_V_MADMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': scc & 1} return result -def _VOP2Op_V_MADAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MADAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.f16 * S1.f16 + SIMM16.f16; S0 = Reg(s0) S1 = Reg(s1) @@ -3838,7 +4137,7 @@ def _VOP2Op_V_MADAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 + S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -3849,7 +4148,7 @@ def _VOP2Op_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 - S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -3860,7 +4159,7 @@ def _VOP2Op_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S1.u16 - S0.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -3871,7 +4170,7 @@ def _VOP2Op_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -3882,7 +4181,7 @@ def _VOP2Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 << S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3893,7 +4192,7 @@ def _VOP2Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3904,7 +4203,7 @@ def _VOP2Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = (S1.i16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -3915,7 +4214,7 @@ def _VOP2Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then @@ -3957,7 +4256,7 @@ def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then @@ -3995,7 +4294,7 @@ def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -4006,7 +4305,7 @@ def _VOP2Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -4017,7 +4316,7 @@ def _VOP2Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -4028,7 +4327,7 @@ def _VOP2Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -4039,7 +4338,7 @@ def _VOP2Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -4050,7 +4349,7 @@ def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4061,7 +4360,7 @@ def _VOP2Op_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 - S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4072,7 +4371,7 @@ def _VOP2Op_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S1.u32 - S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4083,7 +4382,7 @@ def _VOP2Op_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -4101,7 +4400,7 @@ def _VOP2Op_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); @@ -4119,7 +4418,7 @@ def _VOP2Op_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); @@ -4141,7 +4440,7 @@ def _VOP2Op_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); @@ -4171,7 +4470,7 @@ def _VOP2Op_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -4182,7 +4481,7 @@ def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16); # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) S0 = Reg(s0) @@ -4195,7 +4494,7 @@ def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4271,7 +4570,7 @@ VOP2Op_FUNCTIONS = { VOP2Op.V_XNOR_B32: _VOP2Op_V_XNOR_B32, } -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; @@ -4289,7 +4588,7 @@ def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -4305,7 +4604,7 @@ def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; @@ -4322,7 +4621,7 @@ def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; @@ -4339,7 +4638,7 @@ def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -4355,7 +4654,7 @@ def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -4371,7 +4670,7 @@ def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -4387,7 +4686,7 @@ def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; @@ -4404,7 +4703,7 @@ def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; @@ -4421,7 +4720,7 @@ def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; @@ -4439,7 +4738,7 @@ def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; @@ -4456,7 +4755,7 @@ def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; @@ -4473,7 +4772,7 @@ def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; @@ -4490,7 +4789,7 @@ def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; @@ -4507,7 +4806,7 @@ def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); @@ -4525,7 +4824,7 @@ def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; @@ -4542,7 +4841,7 @@ def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; @@ -4559,7 +4858,7 @@ def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16); # tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16); @@ -4576,7 +4875,7 @@ def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16); # tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16); @@ -4593,7 +4892,7 @@ def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 32'F(S0[15 : 0].bf16) * 32'F(S1[15 : 0].bf16); # tmp += 32'F(S0[31 : 16].bf16) * 32'F(S1[31 : 16].bf16); # tmp += S2.f32; @@ -4612,7 +4911,7 @@ def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].f16 = 16'F(v_minimum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)); # tmp[15 : 0].f16 = 16'F(v_minimum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)); # D0.b32 = tmp.b32 @@ -4629,7 +4928,7 @@ def _VOP3POp_V_PK_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].f16 = 16'F(v_maximum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)); # tmp[15 : 0].f16 = 16'F(v_maximum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)); # D0.b32 = tmp.b32 @@ -4646,7 +4945,7 @@ def _VOP3POp_V_PK_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -4665,7 +4964,7 @@ def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.i32; # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); @@ -4684,7 +4983,7 @@ def _VOP3POp_V_DOT2_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u16_to_u32(S0[15 : 0].u16) * u16_to_u32(S1[15 : 0].u16); # tmp += u16_to_u32(S0[31 : 16].u16) * u16_to_u32(S1[31 : 16].u16); @@ -4703,7 +5002,7 @@ def _VOP3POp_V_DOT2_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.i32; # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); @@ -4726,7 +5025,7 @@ def _VOP3POp_V_DOT4_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); @@ -4749,7 +5048,7 @@ def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT8_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT8_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.i32; # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); @@ -4780,7 +5079,7 @@ def _VOP3POp_V_DOT8_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); @@ -4811,7 +5110,7 @@ def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 64'B; # tmp[31 : 0].f32 = fma(S0[31 : 0].f32, S1[31 : 0].f32, S2[31 : 0].f32); # tmp[63 : 32].f32 = fma(S0[63 : 32].f32, S1[63 : 32].f32, S2[63 : 32].f32); @@ -4830,7 +5129,7 @@ def _VOP3POp_V_PK_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3POp_V_PK_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 64'B; # tmp[31 : 0].f32 = S0[31 : 0].f32 * S1[31 : 0].f32; # tmp[63 : 32].f32 = S0[63 : 32].f32 * S1[63 : 32].f32; @@ -4848,7 +5147,7 @@ def _VOP3POp_V_PK_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3POp_V_PK_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 64'B; # tmp[31 : 0].f32 = S0[31 : 0].f32 + S1[31 : 0].f32; # tmp[63 : 32].f32 = S0[63 : 32].f32 + S1[63 : 32].f32; @@ -4866,7 +5165,7 @@ def _VOP3POp_V_PK_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3POp_V_PK_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp0.u32 = S0.u32[OPSEL[0].i32 * 32 + 31 : OPSEL[0].i32 * 32]; # tmp1.u32 = S1.u32[OPSEL[1].i32 * 32 + 31 : OPSEL[1].i32 * 32]; # D0.u32[31 : 0] = tmp0.u32; @@ -4920,7 +5219,7 @@ VOP3POp_FUNCTIONS = { VOP3POp.V_PK_MOV_B32: _VOP3POp_V_PK_MOV_B32, } -def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -4957,6 +5256,7 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f32)): result = S1.u32[0] @@ -4976,9 +5276,11 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -5035,7 +5337,7 @@ def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -5072,6 +5374,7 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(S0.f64): result = S1.u32[0] @@ -5091,9 +5394,11 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -5150,7 +5455,7 @@ def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -5187,6 +5492,7 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f16)): result = S1.u32[0] @@ -5206,9 +5512,11 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -5265,13 +5573,14 @@ def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -5279,9 +5588,11 @@ def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -5290,6 +5601,7 @@ def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- @@ -5297,9 +5609,11 @@ def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -5308,6 +5622,7 @@ def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- @@ -5315,9 +5630,11 @@ def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5325,6 +5642,7 @@ def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- @@ -5332,9 +5650,11 @@ def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -5343,6 +5663,7 @@ def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- @@ -5350,9 +5671,11 @@ def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5360,6 +5683,7 @@ def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- @@ -5367,9 +5691,11 @@ def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5377,6 +5703,7 @@ def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- @@ -5384,9 +5711,11 @@ def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5395,6 +5724,7 @@ def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- @@ -5402,9 +5732,11 @@ def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5413,6 +5745,7 @@ def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- @@ -5420,9 +5753,11 @@ def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5431,6 +5766,7 @@ def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- @@ -5438,9 +5774,11 @@ def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5449,6 +5787,7 @@ def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- @@ -5456,9 +5795,11 @@ def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= @@ -5468,6 +5809,7 @@ def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- @@ -5475,9 +5817,11 @@ def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5486,6 +5830,7 @@ def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- @@ -5493,9 +5838,11 @@ def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -5505,6 +5852,7 @@ def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- @@ -5512,9 +5860,11 @@ def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -5524,6 +5874,7 @@ def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- @@ -5531,15 +5882,18 @@ def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -5547,9 +5901,11 @@ def _VOPCOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -5557,6 +5913,7 @@ def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -5565,9 +5922,11 @@ def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -5577,6 +5936,7 @@ def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- @@ -5585,9 +5945,11 @@ def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -5597,6 +5959,7 @@ def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- @@ -5605,9 +5968,11 @@ def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5616,6 +5981,7 @@ def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- @@ -5624,9 +5990,11 @@ def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -5636,6 +6004,7 @@ def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- @@ -5644,9 +6013,11 @@ def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5655,6 +6026,7 @@ def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- @@ -5663,9 +6035,11 @@ def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5674,6 +6048,7 @@ def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- @@ -5682,9 +6057,11 @@ def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5694,6 +6071,7 @@ def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- @@ -5702,9 +6080,11 @@ def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5713,6 +6093,7 @@ def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- @@ -5721,9 +6102,11 @@ def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5733,6 +6116,7 @@ def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- @@ -5741,9 +6125,11 @@ def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5753,6 +6139,7 @@ def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- @@ -5761,9 +6148,11 @@ def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. @@ -5773,6 +6162,7 @@ def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- @@ -5781,9 +6171,11 @@ def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5793,6 +6185,7 @@ def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- @@ -5801,9 +6194,11 @@ def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -5814,6 +6209,7 @@ def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- @@ -5822,9 +6218,11 @@ def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -5835,6 +6233,7 @@ def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- @@ -5843,9 +6242,11 @@ def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -5853,6 +6254,7 @@ def _VOPCOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -5861,15 +6263,18 @@ def _VOPCOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -5877,9 +6282,11 @@ def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -5888,6 +6295,7 @@ def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- @@ -5895,9 +6303,11 @@ def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -5906,6 +6316,7 @@ def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- @@ -5913,9 +6324,11 @@ def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5923,6 +6336,7 @@ def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- @@ -5930,9 +6344,11 @@ def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -5941,6 +6357,7 @@ def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- @@ -5948,9 +6365,11 @@ def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5958,6 +6377,7 @@ def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- @@ -5965,9 +6385,11 @@ def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5975,6 +6397,7 @@ def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- @@ -5982,9 +6405,11 @@ def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -5993,6 +6418,7 @@ def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- @@ -6000,9 +6426,11 @@ def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -6011,6 +6439,7 @@ def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- @@ -6018,9 +6447,11 @@ def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -6029,6 +6460,7 @@ def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- @@ -6036,9 +6468,11 @@ def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -6047,6 +6481,7 @@ def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- @@ -6054,9 +6489,11 @@ def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= @@ -6066,6 +6503,7 @@ def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- @@ -6073,9 +6511,11 @@ def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -6084,6 +6524,7 @@ def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- @@ -6091,9 +6532,11 @@ def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -6103,6 +6546,7 @@ def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- @@ -6110,9 +6554,11 @@ def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -6122,6 +6568,7 @@ def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- @@ -6129,15 +6576,18 @@ def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -6145,9 +6595,11 @@ def _VOPCOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -6155,6 +6607,7 @@ def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -6163,9 +6616,11 @@ def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -6175,6 +6630,7 @@ def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- @@ -6183,9 +6639,11 @@ def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -6195,6 +6653,7 @@ def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- @@ -6203,9 +6662,11 @@ def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6214,6 +6675,7 @@ def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- @@ -6222,9 +6684,11 @@ def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -6234,6 +6698,7 @@ def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- @@ -6242,9 +6707,11 @@ def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6253,6 +6720,7 @@ def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- @@ -6261,9 +6729,11 @@ def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6272,6 +6742,7 @@ def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- @@ -6280,9 +6751,11 @@ def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -6292,6 +6765,7 @@ def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- @@ -6300,9 +6774,11 @@ def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6311,6 +6787,7 @@ def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- @@ -6319,9 +6796,11 @@ def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -6331,6 +6810,7 @@ def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- @@ -6339,9 +6819,11 @@ def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -6351,6 +6833,7 @@ def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- @@ -6359,9 +6842,11 @@ def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. @@ -6371,6 +6856,7 @@ def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- @@ -6379,9 +6865,11 @@ def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -6391,6 +6879,7 @@ def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- @@ -6399,9 +6888,11 @@ def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -6412,6 +6903,7 @@ def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- @@ -6420,9 +6912,11 @@ def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -6433,6 +6927,7 @@ def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- @@ -6441,9 +6936,11 @@ def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -6451,6 +6948,7 @@ def _VOPCOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -6459,15 +6957,18 @@ def _VOPCOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -6475,9 +6976,11 @@ def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -6486,6 +6989,7 @@ def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- @@ -6493,9 +6997,11 @@ def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -6504,6 +7010,7 @@ def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- @@ -6511,9 +7018,11 @@ def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6521,6 +7030,7 @@ def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- @@ -6528,9 +7038,11 @@ def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -6539,6 +7051,7 @@ def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- @@ -6546,9 +7059,11 @@ def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6556,6 +7071,7 @@ def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- @@ -6563,9 +7079,11 @@ def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6573,6 +7091,7 @@ def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- @@ -6580,9 +7099,11 @@ def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -6591,6 +7112,7 @@ def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- @@ -6598,9 +7120,11 @@ def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -6609,6 +7133,7 @@ def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- @@ -6616,9 +7141,11 @@ def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -6627,6 +7154,7 @@ def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- @@ -6634,9 +7162,11 @@ def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -6645,6 +7175,7 @@ def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- @@ -6652,9 +7183,11 @@ def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= @@ -6664,6 +7197,7 @@ def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- @@ -6671,9 +7205,11 @@ def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -6682,6 +7218,7 @@ def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- @@ -6689,9 +7226,11 @@ def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -6701,6 +7240,7 @@ def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- @@ -6708,9 +7248,11 @@ def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -6720,6 +7262,7 @@ def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- @@ -6727,15 +7270,18 @@ def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -6743,9 +7289,11 @@ def _VOPCOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -6753,6 +7301,7 @@ def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -6761,9 +7310,11 @@ def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -6773,6 +7324,7 @@ def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- @@ -6781,9 +7333,11 @@ def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -6793,6 +7347,7 @@ def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- @@ -6801,9 +7356,11 @@ def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6812,6 +7369,7 @@ def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- @@ -6820,9 +7378,11 @@ def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -6832,6 +7392,7 @@ def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- @@ -6840,9 +7401,11 @@ def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6851,6 +7414,7 @@ def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- @@ -6859,9 +7423,11 @@ def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6870,6 +7436,7 @@ def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- @@ -6878,9 +7445,11 @@ def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -6890,6 +7459,7 @@ def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- @@ -6898,9 +7468,11 @@ def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6909,6 +7481,7 @@ def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- @@ -6917,9 +7490,11 @@ def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -6929,6 +7504,7 @@ def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- @@ -6937,9 +7513,11 @@ def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -6949,6 +7527,7 @@ def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- @@ -6957,9 +7536,11 @@ def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. @@ -6969,6 +7550,7 @@ def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- @@ -6977,9 +7559,11 @@ def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -6989,6 +7573,7 @@ def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- @@ -6997,9 +7582,11 @@ def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -7010,6 +7597,7 @@ def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- @@ -7018,9 +7606,11 @@ def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -7031,6 +7621,7 @@ def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- @@ -7039,9 +7630,11 @@ def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -7049,6 +7642,7 @@ def _VOPCOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7057,15 +7651,18 @@ def _VOPCOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7073,9 +7670,11 @@ def _VOPCOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -7084,6 +7683,7 @@ def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- @@ -7091,9 +7691,11 @@ def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -7102,6 +7704,7 @@ def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- @@ -7109,9 +7712,11 @@ def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7119,6 +7724,7 @@ def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- @@ -7126,9 +7732,11 @@ def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -7137,6 +7745,7 @@ def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- @@ -7144,9 +7753,11 @@ def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -7155,6 +7766,7 @@ def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- @@ -7162,9 +7774,11 @@ def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7172,6 +7786,7 @@ def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- @@ -7179,15 +7794,18 @@ def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7195,15 +7813,18 @@ def _VOPCOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7211,9 +7832,11 @@ def _VOPCOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -7222,6 +7845,7 @@ def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- @@ -7229,9 +7853,11 @@ def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -7240,6 +7866,7 @@ def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- @@ -7247,9 +7874,11 @@ def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7257,6 +7886,7 @@ def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- @@ -7264,9 +7894,11 @@ def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -7275,6 +7907,7 @@ def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- @@ -7282,9 +7915,11 @@ def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -7293,6 +7928,7 @@ def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- @@ -7300,9 +7936,11 @@ def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7310,6 +7948,7 @@ def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- @@ -7317,15 +7956,18 @@ def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7333,9 +7975,11 @@ def _VOPCOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -7343,6 +7987,7 @@ def _VOPCOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7351,9 +7996,11 @@ def _VOPCOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -7363,6 +8010,7 @@ def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- @@ -7371,9 +8019,11 @@ def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -7383,6 +8033,7 @@ def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- @@ -7391,9 +8042,11 @@ def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7402,6 +8055,7 @@ def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- @@ -7410,9 +8064,11 @@ def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -7422,6 +8078,7 @@ def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- @@ -7430,9 +8087,11 @@ def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -7442,6 +8101,7 @@ def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- @@ -7450,9 +8110,11 @@ def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7461,6 +8123,7 @@ def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- @@ -7469,9 +8132,11 @@ def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -7479,6 +8144,7 @@ def _VOPCOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7487,9 +8153,11 @@ def _VOPCOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -7497,6 +8165,7 @@ def _VOPCOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7505,9 +8174,11 @@ def _VOPCOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -7517,6 +8188,7 @@ def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- @@ -7525,9 +8197,11 @@ def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -7537,6 +8211,7 @@ def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- @@ -7545,9 +8220,11 @@ def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7556,6 +8233,7 @@ def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- @@ -7564,9 +8242,11 @@ def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -7576,6 +8256,7 @@ def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- @@ -7584,9 +8265,11 @@ def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -7596,6 +8279,7 @@ def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- @@ -7604,9 +8288,11 @@ def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7615,6 +8301,7 @@ def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- @@ -7623,9 +8310,11 @@ def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -7633,6 +8322,7 @@ def _VOPCOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7641,15 +8331,18 @@ def _VOPCOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7657,9 +8350,11 @@ def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -7668,6 +8363,7 @@ def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- @@ -7675,9 +8371,11 @@ def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -7686,6 +8384,7 @@ def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- @@ -7693,9 +8392,11 @@ def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7703,6 +8404,7 @@ def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- @@ -7710,9 +8412,11 @@ def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -7721,6 +8425,7 @@ def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- @@ -7728,9 +8433,11 @@ def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -7739,6 +8446,7 @@ def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- @@ -7746,9 +8454,11 @@ def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7756,6 +8466,7 @@ def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- @@ -7763,15 +8474,18 @@ def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7779,15 +8493,18 @@ def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7795,9 +8512,11 @@ def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -7806,6 +8525,7 @@ def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- @@ -7813,9 +8533,11 @@ def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -7824,6 +8546,7 @@ def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- @@ -7831,9 +8554,11 @@ def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7841,6 +8566,7 @@ def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- @@ -7848,9 +8574,11 @@ def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -7859,6 +8587,7 @@ def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- @@ -7866,9 +8595,11 @@ def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -7877,6 +8608,7 @@ def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- @@ -7884,9 +8616,11 @@ def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7894,6 +8628,7 @@ def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- @@ -7901,15 +8636,18 @@ def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -7917,9 +8655,11 @@ def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -7927,6 +8667,7 @@ def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -7935,9 +8676,11 @@ def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -7947,6 +8690,7 @@ def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- @@ -7955,9 +8699,11 @@ def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -7967,6 +8713,7 @@ def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- @@ -7975,9 +8722,11 @@ def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -7986,6 +8735,7 @@ def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- @@ -7994,9 +8744,11 @@ def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -8006,6 +8758,7 @@ def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- @@ -8014,9 +8767,11 @@ def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -8026,6 +8781,7 @@ def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- @@ -8034,9 +8790,11 @@ def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8045,6 +8803,7 @@ def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- @@ -8053,9 +8812,11 @@ def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -8063,6 +8824,7 @@ def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -8071,9 +8833,11 @@ def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -8081,6 +8845,7 @@ def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -8089,9 +8854,11 @@ def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -8101,6 +8868,7 @@ def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- @@ -8109,9 +8877,11 @@ def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -8121,6 +8891,7 @@ def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- @@ -8129,9 +8900,11 @@ def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8140,6 +8913,7 @@ def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- @@ -8148,9 +8922,11 @@ def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -8160,6 +8936,7 @@ def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- @@ -8168,9 +8945,11 @@ def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -8180,6 +8959,7 @@ def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- @@ -8188,9 +8968,11 @@ def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8199,6 +8981,7 @@ def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- @@ -8207,9 +8990,11 @@ def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -8217,6 +9002,7 @@ def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -8225,15 +9011,18 @@ def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -8241,9 +9030,11 @@ def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -8252,6 +9043,7 @@ def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- @@ -8259,9 +9051,11 @@ def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -8270,6 +9064,7 @@ def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- @@ -8277,9 +9072,11 @@ def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8287,6 +9084,7 @@ def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- @@ -8294,9 +9092,11 @@ def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -8305,6 +9105,7 @@ def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- @@ -8312,9 +9113,11 @@ def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -8323,6 +9126,7 @@ def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- @@ -8330,9 +9134,11 @@ def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8340,6 +9146,7 @@ def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- @@ -8347,15 +9154,18 @@ def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -8363,15 +9173,18 @@ def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -8379,9 +9192,11 @@ def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -8390,6 +9205,7 @@ def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- @@ -8397,9 +9213,11 @@ def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -8408,6 +9226,7 @@ def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- @@ -8415,9 +9234,11 @@ def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8425,6 +9246,7 @@ def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- @@ -8432,9 +9254,11 @@ def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -8443,6 +9267,7 @@ def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- @@ -8450,9 +9275,11 @@ def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -8461,6 +9288,7 @@ def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- @@ -8468,9 +9296,11 @@ def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8478,6 +9308,7 @@ def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- @@ -8485,15 +9316,18 @@ def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -8501,9 +9335,11 @@ def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -8511,6 +9347,7 @@ def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -8519,9 +9356,11 @@ def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -8531,6 +9370,7 @@ def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- @@ -8539,9 +9379,11 @@ def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -8551,6 +9393,7 @@ def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- @@ -8559,9 +9402,11 @@ def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8570,6 +9415,7 @@ def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- @@ -8578,9 +9424,11 @@ def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -8590,6 +9438,7 @@ def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- @@ -8598,9 +9447,11 @@ def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -8610,6 +9461,7 @@ def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- @@ -8618,9 +9470,11 @@ def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8629,6 +9483,7 @@ def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- @@ -8637,9 +9492,11 @@ def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -8647,6 +9504,7 @@ def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -8655,9 +9513,11 @@ def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -8665,6 +9525,7 @@ def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -8673,9 +9534,11 @@ def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -8685,6 +9548,7 @@ def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- @@ -8693,9 +9557,11 @@ def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -8705,6 +9571,7 @@ def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- @@ -8713,9 +9580,11 @@ def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8724,6 +9593,7 @@ def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- @@ -8732,9 +9602,11 @@ def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -8744,6 +9616,7 @@ def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- @@ -8752,9 +9625,11 @@ def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -8764,6 +9639,7 @@ def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- @@ -8772,9 +9648,11 @@ def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -8783,6 +9661,7 @@ def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- @@ -8791,9 +9670,11 @@ def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -8811,6 +9692,7 @@ def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC = Reg(exec_mask) tmp = Reg(0) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) @@ -8828,6 +9710,8 @@ def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result VOPCOp_FUNCTIONS = { @@ -9031,7 +9915,7 @@ VOPCOp_FUNCTIONS = { VOPCOp.V_CMPX_T_U64: _VOPCOp_V_CMPX_T_U64, } -def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -9068,6 +9952,7 @@ def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f32)): result = S1.u32[0] @@ -9086,9 +9971,11 @@ def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -9144,7 +10031,7 @@ def _VOP3AOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -9181,6 +10068,7 @@ def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(S0.f64): result = S1.u32[0] @@ -9199,9 +10087,11 @@ def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -9257,7 +10147,7 @@ def _VOP3AOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -9294,6 +10184,7 @@ def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f16)): result = S1.u32[0] @@ -9312,9 +10203,11 @@ def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -9370,22 +10263,25 @@ def _VOP3AOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3AOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -9394,15 +10290,18 @@ def _VOP3AOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -9411,15 +10310,18 @@ def _VOP3AOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9427,15 +10329,18 @@ def _VOP3AOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -9444,15 +10349,18 @@ def _VOP3AOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9460,15 +10368,18 @@ def _VOP3AOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9476,15 +10387,18 @@ def _VOP3AOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -9493,15 +10407,18 @@ def _VOP3AOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -9510,15 +10427,18 @@ def _VOP3AOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -9527,15 +10447,18 @@ def _VOP3AOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -9544,15 +10467,18 @@ def _VOP3AOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= @@ -9562,15 +10488,18 @@ def _VOP3AOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -9579,15 +10508,18 @@ def _VOP3AOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -9597,15 +10529,18 @@ def _VOP3AOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -9615,30 +10550,36 @@ def _VOP3AOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -9646,6 +10587,7 @@ def _VOP3AOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -9653,9 +10595,11 @@ def _VOP3AOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -9665,6 +10609,7 @@ def _VOP3AOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- @@ -9672,9 +10617,11 @@ def _VOP3AOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -9684,6 +10631,7 @@ def _VOP3AOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- @@ -9691,9 +10639,11 @@ def _VOP3AOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9702,6 +10652,7 @@ def _VOP3AOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- @@ -9709,9 +10660,11 @@ def _VOP3AOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -9721,6 +10674,7 @@ def _VOP3AOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- @@ -9728,9 +10682,11 @@ def _VOP3AOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9739,6 +10695,7 @@ def _VOP3AOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- @@ -9746,9 +10703,11 @@ def _VOP3AOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9757,6 +10716,7 @@ def _VOP3AOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- @@ -9764,9 +10724,11 @@ def _VOP3AOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -9776,6 +10738,7 @@ def _VOP3AOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- @@ -9783,9 +10746,11 @@ def _VOP3AOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9794,6 +10759,7 @@ def _VOP3AOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- @@ -9801,9 +10767,11 @@ def _VOP3AOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -9813,6 +10781,7 @@ def _VOP3AOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- @@ -9820,9 +10789,11 @@ def _VOP3AOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -9832,6 +10803,7 @@ def _VOP3AOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- @@ -9839,9 +10811,11 @@ def _VOP3AOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. @@ -9851,6 +10825,7 @@ def _VOP3AOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- @@ -9858,9 +10833,11 @@ def _VOP3AOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -9870,6 +10847,7 @@ def _VOP3AOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- @@ -9877,9 +10855,11 @@ def _VOP3AOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -9890,6 +10870,7 @@ def _VOP3AOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- @@ -9897,9 +10878,11 @@ def _VOP3AOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -9910,6 +10893,7 @@ def _VOP3AOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- @@ -9917,9 +10901,11 @@ def _VOP3AOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -9927,6 +10913,7 @@ def _VOP3AOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -9934,24 +10921,29 @@ def _VOP3AOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -9960,15 +10952,18 @@ def _VOP3AOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -9977,15 +10972,18 @@ def _VOP3AOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -9993,15 +10991,18 @@ def _VOP3AOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -10010,15 +11011,18 @@ def _VOP3AOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10026,15 +11030,18 @@ def _VOP3AOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10042,15 +11049,18 @@ def _VOP3AOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -10059,15 +11069,18 @@ def _VOP3AOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -10076,15 +11089,18 @@ def _VOP3AOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -10093,15 +11109,18 @@ def _VOP3AOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -10110,15 +11129,18 @@ def _VOP3AOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= @@ -10128,15 +11150,18 @@ def _VOP3AOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -10145,15 +11170,18 @@ def _VOP3AOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -10163,15 +11191,18 @@ def _VOP3AOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -10181,30 +11212,36 @@ def _VOP3AOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -10212,6 +11249,7 @@ def _VOP3AOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -10219,9 +11257,11 @@ def _VOP3AOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -10231,6 +11271,7 @@ def _VOP3AOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- @@ -10238,9 +11279,11 @@ def _VOP3AOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -10250,6 +11293,7 @@ def _VOP3AOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- @@ -10257,9 +11301,11 @@ def _VOP3AOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10268,6 +11314,7 @@ def _VOP3AOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- @@ -10275,9 +11322,11 @@ def _VOP3AOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -10287,6 +11336,7 @@ def _VOP3AOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- @@ -10294,9 +11344,11 @@ def _VOP3AOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10305,6 +11357,7 @@ def _VOP3AOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- @@ -10312,9 +11365,11 @@ def _VOP3AOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10323,6 +11378,7 @@ def _VOP3AOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- @@ -10330,9 +11386,11 @@ def _VOP3AOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -10342,6 +11400,7 @@ def _VOP3AOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- @@ -10349,9 +11408,11 @@ def _VOP3AOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10360,6 +11421,7 @@ def _VOP3AOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- @@ -10367,9 +11429,11 @@ def _VOP3AOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -10379,6 +11443,7 @@ def _VOP3AOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- @@ -10386,9 +11451,11 @@ def _VOP3AOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -10398,6 +11465,7 @@ def _VOP3AOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- @@ -10405,9 +11473,11 @@ def _VOP3AOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. @@ -10417,6 +11487,7 @@ def _VOP3AOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- @@ -10424,9 +11495,11 @@ def _VOP3AOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -10436,6 +11509,7 @@ def _VOP3AOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- @@ -10443,9 +11517,11 @@ def _VOP3AOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -10456,6 +11532,7 @@ def _VOP3AOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- @@ -10463,9 +11540,11 @@ def _VOP3AOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -10476,6 +11555,7 @@ def _VOP3AOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- @@ -10483,9 +11563,11 @@ def _VOP3AOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -10493,6 +11575,7 @@ def _VOP3AOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -10500,24 +11583,29 @@ def _VOP3AOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -10526,15 +11614,18 @@ def _VOP3AOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -10543,15 +11634,18 @@ def _VOP3AOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10559,15 +11653,18 @@ def _VOP3AOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -10576,15 +11673,18 @@ def _VOP3AOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10592,15 +11692,18 @@ def _VOP3AOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10608,15 +11711,18 @@ def _VOP3AOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -10625,15 +11731,18 @@ def _VOP3AOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -10642,15 +11751,18 @@ def _VOP3AOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -10659,15 +11771,18 @@ def _VOP3AOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -10676,15 +11791,18 @@ def _VOP3AOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= @@ -10694,15 +11812,18 @@ def _VOP3AOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -10711,15 +11832,18 @@ def _VOP3AOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -10729,15 +11853,18 @@ def _VOP3AOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -10747,30 +11874,36 @@ def _VOP3AOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -10778,6 +11911,7 @@ def _VOP3AOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -10785,9 +11919,11 @@ def _VOP3AOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -10797,6 +11933,7 @@ def _VOP3AOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- @@ -10804,9 +11941,11 @@ def _VOP3AOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -10816,6 +11955,7 @@ def _VOP3AOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- @@ -10823,9 +11963,11 @@ def _VOP3AOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10834,6 +11976,7 @@ def _VOP3AOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- @@ -10841,9 +11984,11 @@ def _VOP3AOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -10853,6 +11998,7 @@ def _VOP3AOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- @@ -10860,9 +12006,11 @@ def _VOP3AOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10871,6 +12019,7 @@ def _VOP3AOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- @@ -10878,9 +12027,11 @@ def _VOP3AOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10889,6 +12040,7 @@ def _VOP3AOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- @@ -10896,9 +12048,11 @@ def _VOP3AOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -10908,6 +12062,7 @@ def _VOP3AOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- @@ -10915,9 +12070,11 @@ def _VOP3AOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -10926,6 +12083,7 @@ def _VOP3AOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- @@ -10933,9 +12091,11 @@ def _VOP3AOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -10945,6 +12105,7 @@ def _VOP3AOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- @@ -10952,9 +12113,11 @@ def _VOP3AOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -10964,6 +12127,7 @@ def _VOP3AOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- @@ -10971,9 +12135,11 @@ def _VOP3AOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= # // D0 = VCC in VOPC encoding. @@ -10983,6 +12149,7 @@ def _VOP3AOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- @@ -10990,9 +12157,11 @@ def _VOP3AOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -11002,6 +12171,7 @@ def _VOP3AOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- @@ -11009,9 +12179,11 @@ def _VOP3AOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -11022,6 +12194,7 @@ def _VOP3AOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- @@ -11029,9 +12202,11 @@ def _VOP3AOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -11042,6 +12217,7 @@ def _VOP3AOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- @@ -11049,9 +12225,11 @@ def _VOP3AOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -11059,6 +12237,7 @@ def _VOP3AOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -11066,24 +12245,29 @@ def _VOP3AOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -11092,15 +12276,18 @@ def _VOP3AOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -11109,15 +12296,18 @@ def _VOP3AOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11125,15 +12315,18 @@ def _VOP3AOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -11142,15 +12335,18 @@ def _VOP3AOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -11159,15 +12355,18 @@ def _VOP3AOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11175,45 +12374,54 @@ def _VOP3AOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -11222,15 +12430,18 @@ def _VOP3AOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -11239,15 +12450,18 @@ def _VOP3AOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11255,15 +12469,18 @@ def _VOP3AOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -11272,15 +12489,18 @@ def _VOP3AOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -11289,15 +12509,18 @@ def _VOP3AOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11305,30 +12528,36 @@ def _VOP3AOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -11336,6 +12565,7 @@ def _VOP3AOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -11343,9 +12573,11 @@ def _VOP3AOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -11355,6 +12587,7 @@ def _VOP3AOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- @@ -11362,9 +12595,11 @@ def _VOP3AOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -11374,6 +12609,7 @@ def _VOP3AOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- @@ -11381,9 +12617,11 @@ def _VOP3AOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11392,6 +12630,7 @@ def _VOP3AOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- @@ -11399,9 +12638,11 @@ def _VOP3AOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -11411,6 +12652,7 @@ def _VOP3AOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- @@ -11418,9 +12660,11 @@ def _VOP3AOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -11430,6 +12674,7 @@ def _VOP3AOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- @@ -11437,9 +12682,11 @@ def _VOP3AOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11448,6 +12695,7 @@ def _VOP3AOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- @@ -11455,9 +12703,11 @@ def _VOP3AOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -11465,6 +12715,7 @@ def _VOP3AOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -11472,9 +12723,11 @@ def _VOP3AOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -11482,6 +12735,7 @@ def _VOP3AOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -11489,9 +12743,11 @@ def _VOP3AOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -11501,6 +12757,7 @@ def _VOP3AOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- @@ -11508,9 +12765,11 @@ def _VOP3AOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -11520,6 +12779,7 @@ def _VOP3AOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- @@ -11527,9 +12787,11 @@ def _VOP3AOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11538,6 +12800,7 @@ def _VOP3AOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- @@ -11545,9 +12808,11 @@ def _VOP3AOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -11557,6 +12822,7 @@ def _VOP3AOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- @@ -11564,9 +12830,11 @@ def _VOP3AOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -11576,6 +12844,7 @@ def _VOP3AOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- @@ -11583,9 +12852,11 @@ def _VOP3AOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11594,6 +12865,7 @@ def _VOP3AOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- @@ -11601,9 +12873,11 @@ def _VOP3AOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -11611,6 +12885,7 @@ def _VOP3AOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -11618,24 +12893,29 @@ def _VOP3AOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -11644,15 +12924,18 @@ def _VOP3AOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -11661,15 +12944,18 @@ def _VOP3AOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11677,15 +12963,18 @@ def _VOP3AOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -11694,15 +12983,18 @@ def _VOP3AOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -11711,15 +13003,18 @@ def _VOP3AOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11727,45 +13022,54 @@ def _VOP3AOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -11774,15 +13078,18 @@ def _VOP3AOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -11791,15 +13098,18 @@ def _VOP3AOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11807,15 +13117,18 @@ def _VOP3AOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -11824,15 +13137,18 @@ def _VOP3AOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -11841,15 +13157,18 @@ def _VOP3AOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11857,30 +13176,36 @@ def _VOP3AOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -11888,6 +13213,7 @@ def _VOP3AOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -11895,9 +13221,11 @@ def _VOP3AOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -11907,6 +13235,7 @@ def _VOP3AOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- @@ -11914,9 +13243,11 @@ def _VOP3AOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -11926,6 +13257,7 @@ def _VOP3AOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- @@ -11933,9 +13265,11 @@ def _VOP3AOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -11944,6 +13278,7 @@ def _VOP3AOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- @@ -11951,9 +13286,11 @@ def _VOP3AOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -11963,6 +13300,7 @@ def _VOP3AOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- @@ -11970,9 +13308,11 @@ def _VOP3AOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -11982,6 +13322,7 @@ def _VOP3AOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- @@ -11989,9 +13330,11 @@ def _VOP3AOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12000,6 +13343,7 @@ def _VOP3AOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- @@ -12007,9 +13351,11 @@ def _VOP3AOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -12017,6 +13363,7 @@ def _VOP3AOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -12024,9 +13371,11 @@ def _VOP3AOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -12034,6 +13383,7 @@ def _VOP3AOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -12041,9 +13391,11 @@ def _VOP3AOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -12053,6 +13405,7 @@ def _VOP3AOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- @@ -12060,9 +13413,11 @@ def _VOP3AOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -12072,6 +13427,7 @@ def _VOP3AOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- @@ -12079,9 +13435,11 @@ def _VOP3AOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12090,6 +13448,7 @@ def _VOP3AOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- @@ -12097,9 +13456,11 @@ def _VOP3AOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -12109,6 +13470,7 @@ def _VOP3AOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- @@ -12116,9 +13478,11 @@ def _VOP3AOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -12128,6 +13492,7 @@ def _VOP3AOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- @@ -12135,9 +13500,11 @@ def _VOP3AOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12146,6 +13513,7 @@ def _VOP3AOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- @@ -12153,9 +13521,11 @@ def _VOP3AOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -12163,6 +13533,7 @@ def _VOP3AOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -12170,24 +13541,29 @@ def _VOP3AOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -12196,15 +13572,18 @@ def _VOP3AOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -12213,15 +13592,18 @@ def _VOP3AOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12229,15 +13611,18 @@ def _VOP3AOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -12246,15 +13631,18 @@ def _VOP3AOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -12263,15 +13651,18 @@ def _VOP3AOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12279,45 +13670,54 @@ def _VOP3AOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -12326,15 +13726,18 @@ def _VOP3AOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -12343,15 +13746,18 @@ def _VOP3AOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12359,15 +13765,18 @@ def _VOP3AOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -12376,15 +13785,18 @@ def _VOP3AOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -12393,15 +13805,18 @@ def _VOP3AOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12409,30 +13824,36 @@ def _VOP3AOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -12440,6 +13861,7 @@ def _VOP3AOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -12447,9 +13869,11 @@ def _VOP3AOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -12459,6 +13883,7 @@ def _VOP3AOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- @@ -12466,9 +13891,11 @@ def _VOP3AOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -12478,6 +13905,7 @@ def _VOP3AOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- @@ -12485,9 +13913,11 @@ def _VOP3AOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12496,6 +13926,7 @@ def _VOP3AOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- @@ -12503,9 +13934,11 @@ def _VOP3AOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -12515,6 +13948,7 @@ def _VOP3AOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- @@ -12522,9 +13956,11 @@ def _VOP3AOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -12534,6 +13970,7 @@ def _VOP3AOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- @@ -12541,9 +13978,11 @@ def _VOP3AOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12552,6 +13991,7 @@ def _VOP3AOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- @@ -12559,9 +13999,11 @@ def _VOP3AOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -12569,6 +14011,7 @@ def _VOP3AOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 # --- end pseudocode --- @@ -12576,9 +14019,11 @@ def _VOP3AOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. @@ -12586,6 +14031,7 @@ def _VOP3AOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 0 # --- end pseudocode --- @@ -12593,9 +14039,11 @@ def _VOP3AOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -12605,6 +14053,7 @@ def _VOP3AOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- @@ -12612,9 +14061,11 @@ def _VOP3AOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -12624,6 +14075,7 @@ def _VOP3AOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- @@ -12631,9 +14083,11 @@ def _VOP3AOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12642,6 +14096,7 @@ def _VOP3AOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- @@ -12649,9 +14104,11 @@ def _VOP3AOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -12661,6 +14118,7 @@ def _VOP3AOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- @@ -12668,9 +14126,11 @@ def _VOP3AOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -12680,6 +14140,7 @@ def _VOP3AOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- @@ -12687,9 +14148,11 @@ def _VOP3AOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12698,6 +14161,7 @@ def _VOP3AOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V VCC = Reg(vcc) EXEC = Reg(exec_mask) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- @@ -12705,9 +14169,11 @@ def _VOP3AOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. @@ -12725,6 +14191,7 @@ def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC = Reg(exec_mask) tmp = Reg(0) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- EXEC.u64[laneId] = D0.u64[laneId] = 1 addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) @@ -12741,9 +14208,11 @@ def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 if EXEC._val != exec_mask: result['exec'] = EXEC._val result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3AOp_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -12753,7 +14222,7 @@ def _VOP3AOp_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'I; # if EXEC == 0x0LL then # lane = 0; @@ -12777,7 +14246,7 @@ def _VOP3AOp_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3AOp_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f64_to_i32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -12787,7 +14256,7 @@ def _VOP3AOp_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = i32_to_f64(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -12798,7 +14267,7 @@ def _VOP3AOp_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3AOp_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -12808,7 +14277,7 @@ def _VOP3AOp_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -12818,7 +14287,7 @@ def _VOP3AOp_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -12828,7 +14297,7 @@ def _VOP3AOp_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -12838,7 +14307,7 @@ def _VOP3AOp_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -12848,7 +14317,7 @@ def _VOP3AOp_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -12858,7 +14327,7 @@ def _VOP3AOp_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) S0 = Reg(s0) D0 = Reg(d0) @@ -12868,7 +14337,7 @@ def _VOP3AOp_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32)) S0 = Reg(s0) D0 = Reg(d0) @@ -12878,7 +14347,7 @@ def _VOP3AOp_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f64_to_f32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -12888,7 +14357,7 @@ def _VOP3AOp_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = f32_to_f64(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -12899,7 +14368,7 @@ def _VOP3AOp_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3AOp_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[7 : 0].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -12909,7 +14378,7 @@ def _VOP3AOp_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[15 : 8].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -12919,7 +14388,7 @@ def _VOP3AOp_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[23 : 16].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -12929,7 +14398,7 @@ def _VOP3AOp_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[31 : 24].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -12939,7 +14408,7 @@ def _VOP3AOp_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f64_to_u32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -12949,7 +14418,7 @@ def _VOP3AOp_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = u32_to_f64(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -12960,7 +14429,7 @@ def _VOP3AOp_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3AOp_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -12971,7 +14440,7 @@ def _VOP3AOp_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _VOP3AOp_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -12987,7 +14456,7 @@ def _VOP3AOp_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3AOp_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -13003,7 +14472,7 @@ def _VOP3AOp_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _VOP3AOp_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -13019,7 +14488,7 @@ def _VOP3AOp_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _VOP3AOp_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + -floor(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -13029,7 +14498,7 @@ def _VOP3AOp_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -13039,7 +14508,7 @@ def _VOP3AOp_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -13054,7 +14523,7 @@ def _VOP3AOp_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -13069,7 +14538,7 @@ def _VOP3AOp_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -13084,7 +14553,7 @@ def _VOP3AOp_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -13094,7 +14563,7 @@ def _VOP3AOp_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -13104,7 +14573,7 @@ def _VOP3AOp_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -13114,7 +14583,7 @@ def _VOP3AOp_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception S0 = Reg(s0) @@ -13125,7 +14594,7 @@ def _VOP3AOp_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -13135,7 +14604,7 @@ def _VOP3AOp_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / S0.f64 S0 = Reg(s0) D0 = Reg(d0) @@ -13146,7 +14615,7 @@ def _VOP3AOp_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -13157,7 +14626,7 @@ def _VOP3AOp_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -13167,7 +14636,7 @@ def _VOP3AOp_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -13178,7 +14647,7 @@ def _VOP3AOp_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3AOp_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -13188,7 +14657,7 @@ def _VOP3AOp_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -13198,7 +14667,7 @@ def _VOP3AOp_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32 S0 = Reg(s0) D0 = Reg(d0) @@ -13208,7 +14677,7 @@ def _VOP3AOp_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -13218,7 +14687,7 @@ def _VOP3AOp_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -13238,7 +14707,7 @@ def _VOP3AOp_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -13258,7 +14727,7 @@ def _VOP3AOp_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -13278,7 +14747,7 @@ def _VOP3AOp_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -13295,7 +14764,7 @@ def _VOP3AOp_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -13313,7 +14782,7 @@ def _VOP3AOp_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3AOp_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + -floor(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -13324,7 +14793,7 @@ def _VOP3AOp_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _VOP3AOp_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -13341,7 +14810,7 @@ def _VOP3AOp_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -13358,7 +14827,7 @@ def _VOP3AOp_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b64 = S0.b64 S0 = Reg(s0) D0 = Reg(d0) @@ -13369,7 +14838,7 @@ def _VOP3AOp_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = u16_to_f16(S0.u16) S0 = Reg(s0) D0 = Reg(d0) @@ -13379,7 +14848,7 @@ def _VOP3AOp_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = i16_to_f16(S0.i16) S0 = Reg(s0) D0 = Reg(d0) @@ -13389,7 +14858,7 @@ def _VOP3AOp_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_u16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -13399,7 +14868,7 @@ def _VOP3AOp_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_i16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -13409,7 +14878,7 @@ def _VOP3AOp_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16 S0 = Reg(s0) D0 = Reg(d0) @@ -13419,7 +14888,7 @@ def _VOP3AOp_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -13429,7 +14898,7 @@ def _VOP3AOp_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -13439,7 +14908,7 @@ def _VOP3AOp_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -13449,7 +14918,7 @@ def _VOP3AOp_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -13459,7 +14928,7 @@ def _VOP3AOp_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -13473,7 +14942,7 @@ def _VOP3AOp_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3AOp_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -13484,7 +14953,7 @@ def _VOP3AOp_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -13495,7 +14964,7 @@ def _VOP3AOp_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S1.f32 - S0.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -13506,7 +14975,7 @@ def _VOP3AOp_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = fma(S0.f64, S1.f64, D0.f64) S0 = Reg(s0) S1 = Reg(s1) @@ -13518,7 +14987,7 @@ def _VOP3AOp_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3AOp_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -13529,7 +14998,7 @@ def _VOP3AOp_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) S0 = Reg(s0) S1 = Reg(s1) @@ -13540,7 +15009,7 @@ def _VOP3AOp_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -13551,7 +15020,7 @@ def _VOP3AOp_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) S0 = Reg(s0) S1 = Reg(s1) @@ -13562,7 +15031,7 @@ def _VOP3AOp_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -13573,7 +15042,7 @@ def _VOP3AOp_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then @@ -13611,7 +15080,7 @@ def _VOP3AOp_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then @@ -13653,7 +15122,7 @@ def _VOP3AOp_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -13664,7 +15133,7 @@ def _VOP3AOp_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -13675,7 +15144,7 @@ def _VOP3AOp_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -13686,7 +15155,7 @@ def _VOP3AOp_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -13697,7 +15166,7 @@ def _VOP3AOp_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13708,7 +15177,7 @@ def _VOP3AOp_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13719,7 +15188,7 @@ def _VOP3AOp_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 << S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13730,7 +15199,7 @@ def _VOP3AOp_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13741,7 +15210,7 @@ def _VOP3AOp_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13752,7 +15221,7 @@ def _VOP3AOp_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13763,7 +15232,7 @@ def _VOP3AOp_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.f32; # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); @@ -13781,7 +15250,7 @@ def _VOP3AOp_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -13792,7 +15261,7 @@ def _VOP3AOp_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -13803,7 +15272,7 @@ def _VOP3AOp_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S1.f16 - S0.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -13814,7 +15283,7 @@ def _VOP3AOp_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -13825,7 +15294,7 @@ def _VOP3AOp_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.f16 * S1.f16 + D0.f16; # if OPSEL.u4[3] then # D0 = { tmp.f16, D0[15 : 0] } @@ -13846,7 +15315,7 @@ def _VOP3AOp_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 + S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -13857,7 +15326,7 @@ def _VOP3AOp_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 - S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -13868,7 +15337,7 @@ def _VOP3AOp_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S1.u16 - S0.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -13879,7 +15348,7 @@ def _VOP3AOp_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -13890,7 +15359,7 @@ def _VOP3AOp_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 << S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13901,7 +15370,7 @@ def _VOP3AOp_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13912,7 +15381,7 @@ def _VOP3AOp_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = (S1.i16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -13923,7 +15392,7 @@ def _VOP3AOp_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then @@ -13965,7 +15434,7 @@ def _VOP3AOp_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then @@ -14003,7 +15472,7 @@ def _VOP3AOp_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -14014,7 +15483,7 @@ def _VOP3AOp_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -14025,7 +15494,7 @@ def _VOP3AOp_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -14036,7 +15505,7 @@ def _VOP3AOp_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -14047,7 +15516,7 @@ def _VOP3AOp_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -14058,7 +15527,7 @@ def _VOP3AOp_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -14069,7 +15538,7 @@ def _VOP3AOp_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 - S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -14080,7 +15549,7 @@ def _VOP3AOp_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S1.u32 - S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -14091,7 +15560,7 @@ def _VOP3AOp_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -14109,7 +15578,7 @@ def _VOP3AOp_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); @@ -14127,7 +15596,7 @@ def _VOP3AOp_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); @@ -14149,7 +15618,7 @@ def _VOP3AOp_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); @@ -14179,7 +15648,7 @@ def _VOP3AOp_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -14190,7 +15659,7 @@ def _VOP3AOp_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16); # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) S0 = Reg(s0) @@ -14203,7 +15672,7 @@ def _VOP3AOp_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -14214,7 +15683,7 @@ def _VOP3AOp_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -14226,7 +15695,7 @@ def _VOP3AOp_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -14238,7 +15707,7 @@ def _VOP3AOp_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -14287,7 +15756,7 @@ def _VOP3AOp_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = cubemap S coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -14329,7 +15798,7 @@ def _VOP3AOp_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = cubemap T coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -14364,7 +15833,7 @@ def _VOP3AOp_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = 2.0 * cubemap major axis. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -14392,7 +15861,7 @@ def _VOP3AOp_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) S0 = Reg(s0) S1 = Reg(s1) @@ -14404,7 +15873,7 @@ def _VOP3AOp_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) S0 = Reg(s0) @@ -14419,7 +15888,7 @@ def _VOP3AOp_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) S0 = Reg(s0) S1 = Reg(s1) @@ -14431,7 +15900,7 @@ def _VOP3AOp_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -14443,7 +15912,7 @@ def _VOP3AOp_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = fma(S0.f64, S1.f64, S2.f64) S0 = Reg(s0) S1 = Reg(s1) @@ -14456,7 +15925,7 @@ def _VOP3AOp_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); @@ -14477,7 +15946,7 @@ def _VOP3AOp_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0]) & 0xffffffffLL) S0 = Reg(s0) S1 = Reg(s1) @@ -14489,7 +15958,7 @@ def _VOP3AOp_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0] * 8U)) & 0xffffffffLL) S0 = Reg(s0) S1 = Reg(s1) @@ -14501,7 +15970,7 @@ def _VOP3AOp_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -14513,7 +15982,7 @@ def _VOP3AOp_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -14525,7 +15994,7 @@ def _VOP3AOp_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -14537,7 +16006,7 @@ def _VOP3AOp_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -14549,7 +16018,7 @@ def _VOP3AOp_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -14561,7 +16030,7 @@ def _VOP3AOp_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -14573,7 +16042,7 @@ def _VOP3AOp_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then # D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then @@ -14600,7 +16069,7 @@ def _VOP3AOp_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then # D0.i32 = v_max_i32(S1.i32, S2.i32) # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then @@ -14623,7 +16092,7 @@ def _VOP3AOp_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then # D0.u32 = v_max_u32(S1.u32, S2.u32) # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then @@ -14646,7 +16115,7 @@ def _VOP3AOp_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -14670,7 +16139,7 @@ def _VOP3AOp_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -14682,7 +16151,7 @@ def _VOP3AOp_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); @@ -14702,7 +16171,7 @@ def _VOP3AOp_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 S0 = Reg(s0) @@ -14715,7 +16184,7 @@ def _VOP3AOp_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); # D0.u32 = tmp @@ -14732,7 +16201,7 @@ def _VOP3AOp_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f32) ^ sign(S2.f32)); # if isNAN(64'F(S2.f32)) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) @@ -14785,7 +16254,7 @@ def _VOP3AOp_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f64) ^ sign(S2.f64)); # if isNAN(S2.f64) then # D0.f64 = cvtToQuietNAN(S2.f64) @@ -14839,7 +16308,7 @@ def _VOP3AOp_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3AOp_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if VCC.u64[laneId] then # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) # else @@ -14861,7 +16330,7 @@ def _VOP3AOp_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3AOp_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if VCC.u64[laneId] then # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) # else @@ -14884,7 +16353,7 @@ def _VOP3AOp_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3AOp_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -14908,7 +16377,7 @@ def _VOP3AOp_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -14930,7 +16399,7 @@ def _VOP3AOp_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3AOp_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -14952,7 +16421,7 @@ def _VOP3AOp_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _VOP3AOp_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); @@ -14973,7 +16442,7 @@ def _VOP3AOp_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.f16 * S1.f16 + S2.f16; # if OPSEL.u4[3] then # D0 = { tmp.f16, D0[15 : 0] } @@ -14995,7 +16464,7 @@ def _VOP3AOp_V_MAD_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_LEGACY_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_LEGACY_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u16 * S1.u16 + S2.u16; # if OPSEL.u4[3] then # D0 = { tmp.u16, D0[15 : 0] } @@ -15017,7 +16486,7 @@ def _VOP3AOp_V_MAD_LEGACY_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_LEGACY_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_LEGACY_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i16 * S1.i16 + S2.i16; # if OPSEL.u4[3] then # D0 = { tmp.i16, D0[15 : 0] } @@ -15039,7 +16508,25 @@ def _VOP3AOp_V_MAD_LEGACY_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FMA_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_PERM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0[31 : 24] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[31 : 24]); + # D0[23 : 16] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[23 : 16]); + # D0[15 : 8] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[15 : 8]); + # D0[7 : 0] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[7 : 0]) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24]) + D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16]) + D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8]) + D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0]) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3AOp_V_FMA_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = fma(S0.f16, S1.f16, S2.f16); # if OPSEL.u4[3] then # D0 = { tmp.f16, D0[15 : 0] } @@ -15061,7 +16548,7 @@ def _VOP3AOp_V_FMA_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f16) ^ sign(S2.f16)); # if isNAN(64'F(S2.f16)) then # tmp = cvtToQuietNAN(64'F(S2.f16)) @@ -15116,7 +16603,7 @@ def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PKACCUM_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PKACCUM_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # byte = S1.u32[1 : 0]; # bit = byte.u32 * 8U; # D0.u32[bit + 7U : bit] = 32'U(f32_to_u8(S0.f32)) @@ -15131,7 +16618,7 @@ def _VOP3AOp_V_CVT_PKACCUM_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15143,7 +16630,7 @@ def _VOP3AOp_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15155,7 +16642,7 @@ def _VOP3AOp_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15167,7 +16654,7 @@ def _VOP3AOp_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -15179,7 +16666,7 @@ def _VOP3AOp_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) S0 = Reg(s0) S1 = Reg(s1) @@ -15191,7 +16678,7 @@ def _VOP3AOp_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -15203,7 +16690,7 @@ def _VOP3AOp_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -15215,7 +16702,7 @@ def _VOP3AOp_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) S0 = Reg(s0) S1 = Reg(s1) @@ -15227,7 +16714,7 @@ def _VOP3AOp_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -15239,7 +16726,7 @@ def _VOP3AOp_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then # D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then @@ -15266,7 +16753,7 @@ def _VOP3AOp_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then # D0.i16 = v_max_i16(S1.i16, S2.i16) # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then @@ -15289,7 +16776,7 @@ def _VOP3AOp_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then # D0.u16 = v_max_u16(S1.u16, S2.u16) # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then @@ -15312,7 +16799,7 @@ def _VOP3AOp_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15324,7 +16811,7 @@ def _VOP3AOp_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -15336,7 +16823,7 @@ def _VOP3AOp_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15348,7 +16835,7 @@ def _VOP3AOp_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -15360,7 +16847,7 @@ def _VOP3AOp_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -15372,7 +16859,7 @@ def _VOP3AOp_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32 | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -15384,7 +16871,7 @@ def _VOP3AOp_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 + S2.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -15396,7 +16883,7 @@ def _VOP3AOp_V_MAD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 + S2.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15408,7 +16895,7 @@ def _VOP3AOp_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 * S1.i16 + S2.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15420,7 +16907,7 @@ def _VOP3AOp_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -15432,7 +16919,7 @@ def _VOP3AOp_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f16) ^ sign(S2.f16)); # if isNAN(64'F(S2.f16)) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) @@ -15477,7 +16964,7 @@ def _VOP3AOp_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHL_ADD_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHL_ADD_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 << S1.u32[2 : 0].u32) + S2.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15490,7 +16977,7 @@ def _VOP3AOp_V_LSHL_ADD_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3AOp_V_BITOP3_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BITOP3_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 16'0U; # tmp = (tmp | (32'I(TTBL.b32 & 0x1) != 0 ? 16'U(~S0.b16 & ~S1.b16 & ~S2.b16) : 16'0U)); # tmp = (tmp | (32'I(TTBL.b32 & 0x2) != 0 ? 16'U(~S0.b16 & ~S1.b16 & S2.b16) : 16'0U)); @@ -15518,7 +17005,7 @@ def _VOP3AOp_V_BITOP3_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_BITOP3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BITOP3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # tmp = (tmp | (32'I(TTBL.b32 & 0x1) != 0 ? 32'U(~S0.b32 & ~S1.b32 & ~S2.b32) : 0U)); # tmp = (tmp | (32'I(TTBL.b32 & 0x2) != 0 ? 32'U(~S0.b32 & ~S1.b32 & S2.b32) : 0U)); @@ -15546,7 +17033,7 @@ def _VOP3AOp_V_BITOP3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp0 = f32_to_fp8_scale(S0.f32, scale.u8); # tmp1 = f32_to_fp8_scale(S1.f32, scale.u8); @@ -15565,7 +17052,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp0 = f32_to_bf8_scale(S0.f32, scale.u8); # tmp1 = f32_to_bf8_scale(S1.f32, scale.u8); @@ -15584,7 +17071,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp = f32_to_fp8_sr_scale(S0.f32, S1.u32, scale.u8); # dstbyte = OPSEL[3 : 2].i32 * 8; @@ -15601,7 +17088,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp = f32_to_bf8_sr_scale(S0.f32, S1.u32, scale.u8); # dstbyte = OPSEL[3 : 2].i32 * 8; @@ -15618,7 +17105,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcword = OPSEL[0].i32 * 16; # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; @@ -15639,7 +17126,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcword = OPSEL[0].i32 * 16; # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; @@ -15660,7 +17147,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8; @@ -15678,7 +17165,7 @@ def _VOP3AOp_V_CVT_SCALEF32_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8; @@ -15696,7 +17183,7 @@ def _VOP3AOp_V_CVT_SCALEF32_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp0 = f32_to_fp4_scale(S0.f32, scale.u8); # tmp1 = f32_to_fp4_scale(S1.f32, scale.u8); @@ -15715,7 +17202,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # tmp0 = f32_to_fp4_sr_scale(S0[31 : 0].f32, randomVal, scale.u8); @@ -15736,7 +17223,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_m result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; @@ -15757,7 +17244,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # tmp0 = f16_to_fp8_scale(S0[15 : 0].f16, scale.u8); # tmp1 = f16_to_fp8_scale(S0[31 : 16].f16, scale.u8); @@ -15775,7 +17262,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # tmp0 = f16_to_bf8_scale(S0[15 : 0].f16, scale.u8); # tmp1 = f16_to_bf8_scale(S0[31 : 16].f16, scale.u8); @@ -15793,7 +17280,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp = f16_to_fp8_sr_scale(S0.f16, S1.u32, scale.u8); # dstbyte = OPSEL[3 : 2].i32 * 8; @@ -15810,7 +17297,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp = f16_to_bf8_sr_scale(S0.f16, S1.u32, scale.u8); # dstbyte = OPSEL[3 : 2].i32 * 8; @@ -15827,7 +17314,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # tmp0 = bf16_to_fp8_scale(S0[15 : 0].bf16, scale.u8); # tmp1 = bf16_to_fp8_scale(S0[31 : 16].bf16, scale.u8); @@ -15845,7 +17332,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # tmp0 = bf16_to_bf8_scale(S0[15 : 0].bf16, scale.u8); # tmp1 = bf16_to_bf8_scale(S0[31 : 16].bf16, scale.u8); @@ -15863,7 +17350,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp = bf16_to_fp8_sr_scale(S0.bf16, S1.u32, scale.u8); # dstbyte = OPSEL[3 : 2].i32 * 8; @@ -15880,7 +17367,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # tmp = bf16_to_bf8_sr_scale(S0.bf16, S1.u32, scale.u8); # dstbyte = OPSEL[3 : 2].i32 * 8; @@ -15897,7 +17384,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcword = OPSEL[0].i32 * 16; # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; @@ -15918,7 +17405,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcword = OPSEL[0].i32 * 16; # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; @@ -15939,7 +17426,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8; @@ -15958,7 +17445,7 @@ def _VOP3AOp_V_CVT_SCALEF32_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8; @@ -15977,7 +17464,7 @@ def _VOP3AOp_V_CVT_SCALEF32_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # tmp0 = f16_to_fp4_scale(S0[15 : 0].f16, scale.u8); # tmp1 = f16_to_fp4_scale(S0[31 : 16].f16, scale.u8); @@ -15995,7 +17482,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # tmp0 = bf16_to_fp4_scale(S0[15 : 0].bf16, scale.u8); # tmp1 = bf16_to_fp4_scale(S0[31 : 16].bf16, scale.u8); @@ -16013,7 +17500,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # tmp0 = f16_to_fp4_sr_scale(S0[15 : 0].f16, randomVal, scale.u8); @@ -16034,7 +17521,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_m result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # tmp0 = bf16_to_fp4_sr_scale(S0[15 : 0].bf16, randomVal, scale.u8); @@ -16055,7 +17542,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_ result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; @@ -16076,7 +17563,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcbyte = OPSEL[1 : 0].i32 * 8; # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; @@ -16097,7 +17584,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_2XPK16_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_2XPK16_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # declare tmp : 192'B; # for pass in 0 : 15 do @@ -16121,7 +17608,7 @@ def _VOP3AOp_V_CVT_SCALEF32_2XPK16_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_2XPK16_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_2XPK16_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # declare tmp : 192'B; # for pass in 0 : 15 do @@ -16145,7 +17632,7 @@ def _VOP3AOp_V_CVT_SCALEF32_2XPK16_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # declare tmp : 192'B; @@ -16167,7 +17654,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # declare tmp : 192'B; @@ -16189,7 +17676,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 1024'B; # for pass in 0 : 31 do @@ -16209,7 +17696,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_ma result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 1024'B; # for pass in 0 : 31 do @@ -16229,7 +17716,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_ma result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 192'B; # for pass in 0 : 31 do @@ -16249,7 +17736,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_m result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 192'B; # for pass in 0 : 31 do @@ -16269,7 +17756,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_ma result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 192'B; # for pass in 0 : 31 do @@ -16289,7 +17776,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_m result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # declare tmp : 192'B; @@ -16311,7 +17798,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F16(s0, s1, s2, d0, scc, vcc, lane, exec result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # declare tmp : 192'B; @@ -16333,7 +17820,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exe result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # declare tmp : 192'B; @@ -16355,7 +17842,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S2.f32)); # randomVal = S1.u32; # declare tmp : 192'B; @@ -16377,7 +17864,7 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exe result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 512'B; # for pass in 0 : 31 do @@ -16397,7 +17884,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_ma result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 512'B; # for pass in 0 : 31 do @@ -16417,7 +17904,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_m result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 512'B; # for pass in 0 : 31 do @@ -16437,7 +17924,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_ma result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # declare tmp : 512'B; # for pass in 0 : 31 do @@ -16457,11 +17944,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_m result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ASHR_PK_I8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # if n <= -128 then - # elsif n >= 127 then - # else - # endif); +def _VOP3AOp_V_ASHR_PK_I8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 16'B; # tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32); # tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32); @@ -16472,12 +17955,6 @@ def _VOP3AOp_V_ASHR_PK_I8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal D0 = Reg(d0) tmp = Reg(0) # --- compiled pseudocode --- - if n <= -128: - pass - elif n >= 127: - pass - else: - pass tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32) tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32) D0[15 : 0] = tmp @@ -16485,11 +17962,7 @@ def _VOP3AOp_V_ASHR_PK_I8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ASHR_PK_U8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # if n <= 0 then - # elsif n >= 255 then - # else - # endif); +def _VOP3AOp_V_ASHR_PK_U8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 16'B; # tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32); # tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32); @@ -16500,12 +17973,6 @@ def _VOP3AOp_V_ASHR_PK_U8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal D0 = Reg(d0) tmp = Reg(0) # --- compiled pseudocode --- - if n <= 0: - pass - elif n >= 255: - pass - else: - pass tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32) tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32) D0[15 : 0] = tmp @@ -16513,7 +17980,7 @@ def _VOP3AOp_V_ASHR_PK_U8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PK_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PK_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -16528,7 +17995,7 @@ def _VOP3AOp_V_CVT_PK_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PK_BF16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PK_BF16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].bf16 = f32_to_bf16(S0.f32); # tmp[31 : 16].bf16 = f32_to_bf16(S1.f32); @@ -16543,7 +18010,7 @@ def _VOP3AOp_V_CVT_PK_BF16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcword = OPSEL[0].i32 * 16; # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; @@ -16564,7 +18031,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # scale = 32'U(exponent(S1.f32)); # srcword = OPSEL[0].i32 * 16; # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; @@ -16585,7 +18052,7 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mas result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -16597,7 +18064,7 @@ def _VOP3AOp_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -16609,7 +18076,7 @@ def _VOP3AOp_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(S0.f64)) then # D0.f64 = cvtToQuietNAN(S0.f64) # elsif (WAVE_MODE.IEEE && isSignalNAN(S1.f64)) then @@ -16648,7 +18115,7 @@ def _VOP3AOp_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (WAVE_MODE.IEEE && isSignalNAN(S0.f64)) then # D0.f64 = cvtToQuietNAN(S0.f64) # elsif (WAVE_MODE.IEEE && isSignalNAN(S1.f64)) then @@ -16691,7 +18158,7 @@ def _VOP3AOp_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3AOp_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * 2.0 ** S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -16703,7 +18170,7 @@ def _VOP3AOp_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _VOP3AOp_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 * S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -16714,7 +18181,7 @@ def _VOP3AOp_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -16725,7 +18192,7 @@ def _VOP3AOp_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -16736,7 +18203,7 @@ def _VOP3AOp_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * 2.0F ** S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -16747,7 +18214,7 @@ def _VOP3AOp_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # lane = S1.u32[5 : 0]; # // Lane select # D0.b32 = VGPR[lane][SRC0.u32] @@ -16761,7 +18228,7 @@ def _VOP3AOp_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32; # for i in 0 : 31 do # tmp += S0[i].u32; @@ -16781,7 +18248,7 @@ def _VOP3AOp_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 << S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -16793,7 +18260,7 @@ def _VOP3AOp_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3AOp_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 >> S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -16805,7 +18272,7 @@ def _VOP3AOp_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3AOp_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i64 = (S1.i64 >> S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -16817,7 +18284,7 @@ def _VOP3AOp_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3AOp_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -16828,7 +18295,7 @@ def _VOP3AOp_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PKNORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PKNORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); @@ -16842,7 +18309,7 @@ def _VOP3AOp_V_CVT_PKNORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PKNORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PKNORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); @@ -16856,7 +18323,7 @@ def _VOP3AOp_V_CVT_PKNORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PKRTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PKRTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -16871,7 +18338,7 @@ def _VOP3AOp_V_CVT_PKRTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = u32_to_u16(S0.u32); # tmp[31 : 16].u16 = u32_to_u16(S1.u32); @@ -16885,7 +18352,7 @@ def _VOP3AOp_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = i32_to_i16(S0.i32); # tmp[31 : 16].i16 = i32_to_i16(S1.i32); @@ -16899,7 +18366,7 @@ def _VOP3AOp_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PKNORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PKNORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); @@ -16913,7 +18380,7 @@ def _VOP3AOp_V_CVT_PKNORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_CVT_PKNORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_CVT_PKNORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); @@ -16927,7 +18394,7 @@ def _VOP3AOp_V_CVT_PKNORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 + S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -16938,7 +18405,7 @@ def _VOP3AOp_V_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 - S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -16949,7 +18416,7 @@ def _VOP3AOp_V_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 + S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -16960,7 +18427,7 @@ def _VOP3AOp_V_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 - S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -16971,7 +18438,7 @@ def _VOP3AOp_V_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[31 : 16].f16 = S1.f16; # D0[15 : 0].f16 = S0.f16 S0 = Reg(s0) @@ -16984,7 +18451,7 @@ def _VOP3AOp_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MUL_LEGACY_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MUL_LEGACY_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = 0.0F @@ -17003,7 +18470,7 @@ def _VOP3AOp_V_MUL_LEGACY_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 32'F(v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32)) S0 = Reg(s0) S1 = Reg(s1) @@ -17015,7 +18482,7 @@ def _VOP3AOp_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3AOp_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3AOp_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 32'F(v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32)) S0 = Reg(s0) S1 = Reg(s1) @@ -17377,6 +18844,7 @@ VOP3AOp_FUNCTIONS = { VOP3AOp.V_MAD_LEGACY_F16: _VOP3AOp_V_MAD_LEGACY_F16, VOP3AOp.V_MAD_LEGACY_U16: _VOP3AOp_V_MAD_LEGACY_U16, VOP3AOp.V_MAD_LEGACY_I16: _VOP3AOp_V_MAD_LEGACY_I16, + VOP3AOp.V_PERM_B32: _VOP3AOp_V_PERM_B32, VOP3AOp.V_FMA_LEGACY_F16: _VOP3AOp_V_FMA_LEGACY_F16, VOP3AOp.V_DIV_FIXUP_LEGACY_F16: _VOP3AOp_V_DIV_FIXUP_LEGACY_F16, VOP3AOp.V_CVT_PKACCUM_U8_F32: _VOP3AOp_V_CVT_PKACCUM_U8_F32, @@ -17490,7 +18958,7 @@ VOP3AOp_FUNCTIONS = { VOP3AOp.V_MAXIMUM3_F32: _VOP3AOp_V_MAXIMUM3_F32, } -def _VOP3BOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. @@ -17510,7 +18978,7 @@ def _VOP3BOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -17530,7 +18998,7 @@ def _VOP3BOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32; # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -17550,7 +19018,7 @@ def _VOP3BOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. @@ -17570,7 +19038,7 @@ def _VOP3BOp_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -17590,7 +19058,7 @@ def _VOP3BOp_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. @@ -17610,7 +19078,7 @@ def _VOP3BOp_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC = 0x0LL; # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # D0.f32 = NAN.f32 @@ -17673,7 +19141,7 @@ def _VOP3BOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3BOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC = 0x0LL; # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then # D0.f64 = NAN.f64 @@ -17737,7 +19205,7 @@ def _VOP3BOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3BOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) S0 = Reg(s0) S1 = Reg(s1) @@ -17754,7 +19222,7 @@ def _VOP3BOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d1'] = D1._val & 1 return result -def _VOP3BOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3BOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) S0 = Reg(s0) S1 = Reg(s1) diff --git a/extra/assembly/amd/autogen/rdna3/gen_pcode.py b/extra/assembly/amd/autogen/rdna3/gen_pcode.py index 8ce42c1cc9..df32416e22 100644 --- a/extra/assembly/amd/autogen/rdna3/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna3/gen_pcode.py @@ -5,7 +5,7 @@ from extra.assembly.amd.autogen.rdna3 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -15,7 +15,7 @@ def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b64 = S0.b64 S0 = Reg(s0) D0 = Reg(d0) @@ -26,7 +26,7 @@ def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.b32 = S0.b32 # endif @@ -40,7 +40,7 @@ def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.b64 = S0.b64 # endif @@ -55,7 +55,7 @@ def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -65,7 +65,7 @@ def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[63 : 0] = S0.u64[0 : 63] S0 = Reg(s0) D0 = Reg(d0) @@ -76,7 +76,7 @@ def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -99,7 +99,7 @@ def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -122,7 +122,7 @@ def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -145,7 +145,7 @@ def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -168,7 +168,7 @@ def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -191,7 +191,7 @@ def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if all bits are the same # for i in 1 : 63 do @@ -214,7 +214,7 @@ def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i8)) S0 = Reg(s0) D0 = Reg(d0) @@ -224,7 +224,7 @@ def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -234,7 +234,7 @@ def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[S0.u32[4 : 0]] = 1'0U S0 = Reg(s0) D0 = Reg(d0) @@ -244,7 +244,7 @@ def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[S0.u32[5 : 0]] = 1'0U S0 = Reg(s0) D0 = Reg(d0) @@ -255,7 +255,7 @@ def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[S0.u32[4 : 0]] = 1'1U S0 = Reg(s0) D0 = Reg(d0) @@ -265,7 +265,7 @@ def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[S0.u32[5 : 0]] = 1'1U S0 = Reg(s0) D0 = Reg(d0) @@ -276,7 +276,7 @@ def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32; # for i in 0 : 31 do # D0.u64[i * 2] = tmp[i]; @@ -295,7 +295,7 @@ def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li result['d0_64'] = True return result -def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; # SCC = D0.i32 != 0 S0 = Reg(s0) @@ -308,7 +308,7 @@ def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'0U ? 1 : 0 @@ -329,7 +329,7 @@ def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'0U ? 1 : 0 @@ -351,7 +351,7 @@ def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'1U ? 1 : 0 @@ -372,7 +372,7 @@ def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'1U ? 1 : 0 @@ -394,7 +394,7 @@ def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # for i in 0 : 7 do # tmp[i] = S0.u32[i * 4 +: 4] != 0U @@ -415,7 +415,7 @@ def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0ULL; # for i in 0 : 15 do # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL @@ -437,7 +437,7 @@ def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # declare i : 6'U; # for i in 6'0U : 6'31U do @@ -459,7 +459,7 @@ def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0ULL; # declare i : 6'U; # for i in 6'0U : 6'63U do @@ -482,7 +482,7 @@ def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32; # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -495,7 +495,7 @@ def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~S0.u64; # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -509,7 +509,7 @@ def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -531,7 +531,7 @@ def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -554,7 +554,7 @@ def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination # saveexec = EXEC.u32; @@ -576,7 +576,7 @@ def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination # saveexec = EXEC.u64; @@ -599,7 +599,7 @@ def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -621,7 +621,7 @@ def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -644,7 +644,7 @@ def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -666,7 +666,7 @@ def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -689,7 +689,7 @@ def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -711,7 +711,7 @@ def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -734,7 +734,7 @@ def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -756,7 +756,7 @@ def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -779,7 +779,7 @@ def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u32; @@ -801,7 +801,7 @@ def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; @@ -824,7 +824,7 @@ def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result['d0_64'] = True return result -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u32; @@ -846,7 +846,7 @@ def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; @@ -869,7 +869,7 @@ def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u32; @@ -891,7 +891,7 @@ def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; @@ -914,7 +914,7 @@ def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result['d0_64'] = True return result -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u32; @@ -936,7 +936,7 @@ def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; @@ -959,7 +959,7 @@ def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -979,7 +979,7 @@ def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -1000,7 +1000,7 @@ def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -1020,7 +1020,7 @@ def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -1041,9 +1041,65 @@ def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result['d0_64'] = True return result -def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_GETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.i64 = PC + 4LL + D0 = Reg(d0) + PC = Reg(pc) + # --- compiled pseudocode --- + D0.i64 = PC + 4 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = S0.i64 + S0 = Reg(s0) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(S0.i64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SWAPPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # jump_addr = S0.i64; + # D0.i64 = PC + 4LL; + # PC = jump_addr.i64 + S0 = Reg(s0) + D0 = Reg(d0) + PC = Reg(pc) + # --- compiled pseudocode --- + jump_addr = S0.i64 + D0.i64 = PC + 4 + PC = Reg(jump_addr.i64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_RFE_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = S0.i64 + S0 = Reg(s0) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(S0.i64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # If SDST is VCC then VCCZ is undefined. VCC = Reg(vcc) + VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- # --- end pseudocode --- @@ -1051,9 +1107,10 @@ def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # If SDST is VCC then VCCZ is undefined. VCC = Reg(vcc) + VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- # --- end pseudocode --- @@ -1061,7 +1118,7 @@ def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -1076,7 +1133,7 @@ def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -1091,7 +1148,7 @@ def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1101,7 +1158,7 @@ def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -1116,7 +1173,7 @@ def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -1126,7 +1183,7 @@ def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -1136,7 +1193,7 @@ def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1146,7 +1203,7 @@ def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1156,7 +1213,7 @@ def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1166,7 +1223,7 @@ def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -1176,7 +1233,7 @@ def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0[31 : 16].f16) S0 = Reg(s0) D0 = Reg(d0) @@ -1186,7 +1243,7 @@ def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -1201,7 +1258,7 @@ def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -1216,7 +1273,7 @@ def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -1226,7 +1283,7 @@ def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -1296,6 +1353,10 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_AND_NOT0_WREXEC_B64: _SOP1Op_S_AND_NOT0_WREXEC_B64, SOP1Op.S_AND_NOT1_WREXEC_B32: _SOP1Op_S_AND_NOT1_WREXEC_B32, SOP1Op.S_AND_NOT1_WREXEC_B64: _SOP1Op_S_AND_NOT1_WREXEC_B64, + SOP1Op.S_GETPC_B64: _SOP1Op_S_GETPC_B64, + SOP1Op.S_SETPC_B64: _SOP1Op_S_SETPC_B64, + SOP1Op.S_SWAPPC_B64: _SOP1Op_S_SWAPPC_B64, + SOP1Op.S_RFE_B64: _SOP1Op_S_RFE_B64, SOP1Op.S_SENDMSG_RTN_B32: _SOP1Op_S_SENDMSG_RTN_B32, SOP1Op.S_SENDMSG_RTN_B64: _SOP1Op_S_SENDMSG_RTN_B64, SOP1Op.S_CEIL_F32: _SOP1Op_S_CEIL_F32, @@ -1315,7 +1376,7 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_RNDNE_F16: _SOP1Op_S_RNDNE_F16, } -def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1332,7 +1393,7 @@ def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1349,7 +1410,7 @@ def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i32 + S1.i32; # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -1366,7 +1427,7 @@ def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i32 - S1.i32; # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -1383,7 +1444,7 @@ def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1400,7 +1461,7 @@ def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - SCC.u32; # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1417,7 +1478,7 @@ def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 - S1.i32; # if D0.i32 < 0 then # D0.i32 = -D0.i32 @@ -1436,7 +1497,7 @@ def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 << S1[4 : 0].u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1450,7 +1511,7 @@ def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 << S1[5 : 0].u32); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1465,7 +1526,7 @@ def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 >> S1[4 : 0].u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1479,7 +1540,7 @@ def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 >> S1[5 : 0].u32); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1494,7 +1555,7 @@ def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); # SCC = D0.i32 != 0 S0 = Reg(s0) @@ -1508,7 +1569,7 @@ def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); # SCC = D0.i64 != 0LL S0 = Reg(s0) @@ -1523,7 +1584,7 @@ def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1540,7 +1601,7 @@ def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1557,7 +1618,7 @@ def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1574,7 +1635,7 @@ def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1591,7 +1652,7 @@ def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 S0 = Reg(s0) @@ -1605,7 +1666,7 @@ def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) @@ -1619,7 +1680,7 @@ def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 S0 = Reg(s0) @@ -1633,7 +1694,7 @@ def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) @@ -1647,7 +1708,7 @@ def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1661,7 +1722,7 @@ def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1676,7 +1737,7 @@ def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1690,7 +1751,7 @@ def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1705,7 +1766,7 @@ def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result['d0_64'] = True return result -def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1719,7 +1780,7 @@ def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1734,7 +1795,7 @@ def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 & S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1748,7 +1809,7 @@ def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1763,7 +1824,7 @@ def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 | S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1777,7 +1838,7 @@ def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1792,7 +1853,7 @@ def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1806,7 +1867,7 @@ def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1821,7 +1882,7 @@ def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & ~S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1835,7 +1896,7 @@ def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 & ~S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1850,7 +1911,7 @@ def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | ~S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1864,7 +1925,7 @@ def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 | ~S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1879,7 +1940,7 @@ def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1893,7 +1954,7 @@ def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); # SCC = D0.i32 != 0 @@ -1910,7 +1971,7 @@ def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1925,7 +1986,7 @@ def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); # SCC = D0.i64 != 0LL @@ -1943,7 +2004,7 @@ def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -1954,7 +2015,7 @@ def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -1966,7 +2027,7 @@ def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 * S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -1977,7 +2038,7 @@ def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -1988,7 +2049,7 @@ def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -1999,7 +2060,7 @@ def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2011,7 +2072,7 @@ def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = SCC ? S0.u64 : S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2024,7 +2085,7 @@ def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2035,7 +2096,7 @@ def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2046,7 +2107,7 @@ def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2057,7 +2118,7 @@ def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[15 : 0].u16, S0[31 : 16].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2068,7 +2129,7 @@ def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2079,7 +2140,7 @@ def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2090,7 +2151,7 @@ def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -2150,7 +2211,7 @@ def _SOP2Op_S_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -2210,7 +2271,7 @@ def _SOP2Op_S_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2221,7 +2282,7 @@ def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -2233,7 +2294,7 @@ def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -2245,7 +2306,7 @@ def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -2256,7 +2317,7 @@ def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -2271,7 +2332,7 @@ def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2282,7 +2343,7 @@ def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2293,7 +2354,7 @@ def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -2353,7 +2414,7 @@ def _SOP2Op_S_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -2413,7 +2474,7 @@ def _SOP2Op_S_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2424,7 +2485,7 @@ def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -2505,7 +2566,7 @@ SOP2Op_FUNCTIONS = { SOP2Op.S_FMAC_F16: _SOP2Op_S_FMAC_F16, } -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 == S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2516,7 +2577,7 @@ def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2527,7 +2588,7 @@ def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2538,7 +2599,7 @@ def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2549,7 +2610,7 @@ def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2560,7 +2621,7 @@ def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2571,7 +2632,7 @@ def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 == S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2582,7 +2643,7 @@ def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2593,7 +2654,7 @@ def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2604,7 +2665,7 @@ def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2615,7 +2676,7 @@ def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2626,7 +2687,7 @@ def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2637,7 +2698,7 @@ def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U S0 = Reg(s0) S1 = Reg(s1) @@ -2648,7 +2709,7 @@ def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U S0 = Reg(s0) S1 = Reg(s1) @@ -2659,7 +2720,7 @@ def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U S0 = Reg(s0) S1 = Reg(s1) @@ -2670,7 +2731,7 @@ def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U S0 = Reg(s0) S1 = Reg(s1) @@ -2681,7 +2742,7 @@ def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64 == S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2692,7 +2753,7 @@ def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2703,7 +2764,7 @@ def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 < S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2714,7 +2775,7 @@ def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 < S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2725,7 +2786,7 @@ def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 == S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2736,7 +2797,7 @@ def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 == S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2747,7 +2808,7 @@ def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 <= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2758,7 +2819,7 @@ def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 <= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2769,7 +2830,7 @@ def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 > S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2780,7 +2841,7 @@ def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 > S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2791,7 +2852,7 @@ def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 <> S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2802,7 +2863,7 @@ def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 <> S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2813,7 +2874,7 @@ def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 >= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2824,7 +2885,7 @@ def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 >= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2835,7 +2896,7 @@ def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -2846,7 +2907,7 @@ def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -2857,7 +2918,7 @@ def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -2868,7 +2929,7 @@ def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -2879,7 +2940,7 @@ def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -2891,7 +2952,7 @@ def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -2903,7 +2964,7 @@ def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -2915,7 +2976,7 @@ def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -2927,7 +2988,7 @@ def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -2939,7 +3000,7 @@ def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -2951,7 +3012,7 @@ def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -2963,7 +3024,7 @@ def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -2975,7 +3036,7 @@ def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -2987,7 +3048,7 @@ def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -2999,7 +3060,7 @@ def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -3011,7 +3072,7 @@ def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -3072,7 +3133,7 @@ SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_NLT_F16: _SOPCOp_S_CMP_NLT_F16, } -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(SIMM16.i16)) D0 = Reg(d0) SIMM16 = Reg(literal) @@ -3082,7 +3143,7 @@ def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Do nothing - for use by tools only # --- compiled pseudocode --- @@ -3090,7 +3151,7 @@ def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': d0, 'scc': scc & 1} return result -def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.i32 = 32'I(signext(SIMM16.i16)) # endif @@ -3104,7 +3165,7 @@ def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = 64'I(S0.i32) == signext(SIMM16.i16) S0 = Reg(s0) SCC = Reg(scc) @@ -3115,7 +3176,7 @@ def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = 64'I(S0.i32) != signext(SIMM16.i16) S0 = Reg(s0) SCC = Reg(scc) @@ -3126,7 +3187,7 @@ def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = 64'I(S0.i32) > signext(SIMM16.i16) S0 = Reg(s0) SCC = Reg(scc) @@ -3137,7 +3198,7 @@ def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = 64'I(S0.i32) >= signext(SIMM16.i16) S0 = Reg(s0) SCC = Reg(scc) @@ -3148,7 +3209,7 @@ def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = 64'I(S0.i32) < signext(SIMM16.i16) S0 = Reg(s0) SCC = Reg(scc) @@ -3159,7 +3220,7 @@ def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = 64'I(S0.i32) <= signext(SIMM16.i16) S0 = Reg(s0) SCC = Reg(scc) @@ -3170,7 +3231,7 @@ def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 == 32'U(SIMM16.u16) S0 = Reg(s0) SCC = Reg(scc) @@ -3181,7 +3242,7 @@ def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 != 32'U(SIMM16.u16) S0 = Reg(s0) SCC = Reg(scc) @@ -3192,7 +3253,7 @@ def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 > 32'U(SIMM16.u16) S0 = Reg(s0) SCC = Reg(scc) @@ -3203,7 +3264,7 @@ def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= 32'U(SIMM16.u16) S0 = Reg(s0) SCC = Reg(scc) @@ -3214,7 +3275,7 @@ def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < 32'U(SIMM16.u16) S0 = Reg(s0) SCC = Reg(scc) @@ -3225,7 +3286,7 @@ def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <= 32'U(SIMM16.u16) S0 = Reg(s0) SCC = Reg(scc) @@ -3236,7 +3297,7 @@ def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # D0.i32 = 32'I(64'I(D0.i32) + signext(SIMM16.i16)); # SCC = ((tmp[31] == SIMM16.i16[15]) && (tmp[31] != D0.i32[31])); @@ -3252,7 +3313,7 @@ def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(64'I(D0.i32) * signext(SIMM16.i16)) D0 = Reg(d0) SIMM16 = Reg(literal) @@ -3262,6 +3323,22 @@ def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result +def _SOPKOp_S_CALL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.i64 = PC + 4LL; + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + D0 = Reg(d0) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + D0.i64 = PC + 4 + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + SOPKOp_FUNCTIONS = { SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, SOPKOp.S_VERSION: _SOPKOp_S_VERSION, @@ -3280,9 +3357,10 @@ SOPKOp_FUNCTIONS = { SOPKOp.S_CMPK_LE_U32: _SOPKOp_S_CMPK_LE_U32, SOPKOp.S_ADDK_I32: _SOPKOp_S_ADDK_I32, SOPKOp.S_MULK_I32: _SOPKOp_S_MULK_I32, + SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64, } -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # for i in 0U : SIMM16.u16[3 : 0].u32 do # endfor SIMM16 = Reg(literal) @@ -3293,7 +3371,7 @@ def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _var result = {'d0': d0, 'scc': scc & 1} return result -def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # instruction may be omitted. For wave64 the compiler may not know the status of the EXEC mask and hence # // 1 cycle delay here # // 2 cycles delay here @@ -3305,22 +3383,255 @@ def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // PC passed into trap handler points to S_TRAP itself, + # PC = TBA.i64; # // trap base address + PC = Reg(pc) # --- compiled pseudocode --- # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_BRANCH(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL; + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_SCC0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if SCC == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SCC = Reg(scc) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if SCC == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_SCC1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if SCC == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SCC = Reg(scc) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if SCC == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_VCCZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # If VCCZ is 1 then jump to a constant offset relative to the current PC. + # if VCCZ.u1 == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + VCC = Reg(vcc) + SIMM16 = Reg(literal) + PC = Reg(pc) + VCCZ = Reg(1 if VCC._val == 0 else 0) + # --- compiled pseudocode --- + if VCCZ.u1 == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_VCCNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # If VCCZ is 0 then jump to a constant offset relative to the current PC. + # if VCCZ.u1 == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + VCC = Reg(vcc) + SIMM16 = Reg(literal) + PC = Reg(pc) + VCCZ = Reg(1 if VCC._val == 0 else 0) + # --- compiled pseudocode --- + if VCCZ.u1 == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_EXECZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if EXECZ.u1 == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + EXEC = Reg(exec_mask) + SIMM16 = Reg(literal) + PC = Reg(pc) + EXECZ = Reg(1 if EXEC._val == 0 else 0) + # --- compiled pseudocode --- + if EXECZ.u1 == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_EXECNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if EXECZ.u1 == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + EXEC = Reg(exec_mask) + SIMM16 = Reg(literal) + PC = Reg(pc) + EXECZ = Reg(1 if EXEC._val == 0 else 0) + # --- compiled pseudocode --- + if EXECZ.u1 == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGSYS(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if WAVE_STATUS.COND_DBG_SYS.u32 != 0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if WAVE_STATUS.COND_DBG_SYS.u32 != 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGUSER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if WAVE_STATUS.COND_DBG_USER.u32 != 0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if WAVE_STATUS.COND_DBG_USER.u32 != 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if (WAVE_STATUS.COND_DBG_SYS || WAVE_STATUS.COND_DBG_USER) then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if (WAVE_STATUS.COND_DBG_SYS or WAVE_STATUS.COND_DBG_USER): + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if (WAVE_STATUS.COND_DBG_SYS && WAVE_STATUS.COND_DBG_USER) then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if (WAVE_STATUS.COND_DBG_SYS and WAVE_STATUS.COND_DBG_USER): + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result SOPPOp_FUNCTIONS = { SOPPOp.S_NOP: _SOPPOp_S_NOP, SOPPOp.S_DELAY_ALU: _SOPPOp_S_DELAY_ALU, SOPPOp.S_TRAP: _SOPPOp_S_TRAP, + SOPPOp.S_BRANCH: _SOPPOp_S_BRANCH, + SOPPOp.S_CBRANCH_SCC0: _SOPPOp_S_CBRANCH_SCC0, + SOPPOp.S_CBRANCH_SCC1: _SOPPOp_S_CBRANCH_SCC1, + SOPPOp.S_CBRANCH_VCCZ: _SOPPOp_S_CBRANCH_VCCZ, + SOPPOp.S_CBRANCH_VCCNZ: _SOPPOp_S_CBRANCH_VCCNZ, + SOPPOp.S_CBRANCH_EXECZ: _SOPPOp_S_CBRANCH_EXECZ, + SOPPOp.S_CBRANCH_EXECNZ: _SOPPOp_S_CBRANCH_EXECNZ, + SOPPOp.S_CBRANCH_CDBGSYS: _SOPPOp_S_CBRANCH_CDBGSYS, + SOPPOp.S_CBRANCH_CDBGUSER: _SOPPOp_S_CBRANCH_CDBGUSER, + SOPPOp.S_CBRANCH_CDBGSYS_OR_USER: _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER, + SOPPOp.S_CBRANCH_CDBGSYS_AND_USER: _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER, } -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -3330,7 +3641,7 @@ def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'U; # if WAVE64 then # // 64 lanes @@ -3373,7 +3684,7 @@ def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f64_to_i32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3383,7 +3694,7 @@ def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = i32_to_f64(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -3394,7 +3705,7 @@ def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -3404,7 +3715,7 @@ def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3414,7 +3725,7 @@ def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3424,7 +3735,7 @@ def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3434,7 +3745,7 @@ def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3444,7 +3755,7 @@ def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -3454,7 +3765,7 @@ def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) S0 = Reg(s0) D0 = Reg(d0) @@ -3464,7 +3775,7 @@ def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32)) S0 = Reg(s0) D0 = Reg(d0) @@ -3474,7 +3785,7 @@ def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f64_to_f32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3484,7 +3795,7 @@ def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = f32_to_f64(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3495,7 +3806,7 @@ def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[7 : 0].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3505,7 +3816,7 @@ def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[15 : 8].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3515,7 +3826,7 @@ def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[23 : 16].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3525,7 +3836,7 @@ def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[31 : 24].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3535,7 +3846,7 @@ def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f64_to_u32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3545,7 +3856,7 @@ def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = u32_to_f64(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3556,7 +3867,7 @@ def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3567,7 +3878,7 @@ def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -3583,7 +3894,7 @@ def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -3599,7 +3910,7 @@ def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -3615,7 +3926,7 @@ def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b16 = S0.b16 S0 = Reg(s0) D0 = Reg(d0) @@ -3625,7 +3936,7 @@ def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + -floor(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3635,7 +3946,7 @@ def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3645,7 +3956,7 @@ def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -3660,7 +3971,7 @@ def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -3675,7 +3986,7 @@ def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -3690,7 +4001,7 @@ def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3700,7 +4011,7 @@ def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3710,7 +4021,7 @@ def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -3720,7 +4031,7 @@ def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception S0 = Reg(s0) @@ -3731,7 +4042,7 @@ def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3741,7 +4052,7 @@ def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / S0.f64 S0 = Reg(s0) D0 = Reg(d0) @@ -3752,7 +4063,7 @@ def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3763,7 +4074,7 @@ def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3773,7 +4084,7 @@ def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3784,7 +4095,7 @@ def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -3794,7 +4105,7 @@ def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -3804,7 +4115,7 @@ def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32 S0 = Reg(s0) D0 = Reg(d0) @@ -3814,7 +4125,7 @@ def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -3824,7 +4135,7 @@ def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -3844,7 +4155,7 @@ def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -3864,7 +4175,7 @@ def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -3884,7 +4195,7 @@ def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -3901,7 +4212,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -3919,7 +4230,7 @@ def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + -floor(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3930,7 +4241,7 @@ def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -3947,7 +4258,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -3964,7 +4275,7 @@ def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # addr = SRC0.u32; # // Raw value from instruction # D0.b32 = VGPR[laneId][addr].b32 @@ -3978,7 +4289,7 @@ def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = u16_to_f16(S0.u16) S0 = Reg(s0) D0 = Reg(d0) @@ -3988,7 +4299,7 @@ def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = i16_to_f16(S0.i16) S0 = Reg(s0) D0 = Reg(d0) @@ -3998,7 +4309,7 @@ def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_u16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4008,7 +4319,7 @@ def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_i16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4018,7 +4329,7 @@ def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16 S0 = Reg(s0) D0 = Reg(d0) @@ -4028,7 +4339,7 @@ def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4038,7 +4349,7 @@ def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4048,7 +4359,7 @@ def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4058,7 +4369,7 @@ def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4068,7 +4379,7 @@ def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.f16 = S0.f16 # else @@ -4085,7 +4396,7 @@ def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.i16 = 16'0 # else @@ -4102,7 +4413,7 @@ def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -4117,7 +4428,7 @@ def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -4132,7 +4443,7 @@ def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4142,7 +4453,7 @@ def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -4157,7 +4468,7 @@ def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + -floor(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4167,7 +4478,7 @@ def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -4177,7 +4488,7 @@ def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -4187,7 +4498,17 @@ def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.b16 = { SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16) } + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b16 = _pack(SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_snorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4197,7 +4518,7 @@ def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_unorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4207,7 +4528,7 @@ def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.b32; # D0.b32 = S0.b32; # S0.b32 = tmp @@ -4222,7 +4543,7 @@ def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.b16; # D0.b16 = S0.b16; # S0.b16 = tmp @@ -4237,7 +4558,7 @@ def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = ~S0.u16 S0 = Reg(s0) D0 = Reg(d0) @@ -4247,7 +4568,7 @@ def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -4257,7 +4578,7 @@ def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { 16'0, S0.u16 } S0 = Reg(s0) D0 = Reg(d0) @@ -4338,6 +4659,7 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_FRACT_F16: _VOP1Op_V_FRACT_F16, VOP1Op.V_SIN_F16: _VOP1Op_V_SIN_F16, VOP1Op.V_COS_F16: _VOP1Op_V_COS_F16, + VOP1Op.V_SAT_PK_U8_I16: _VOP1Op_V_SAT_PK_U8_I16, VOP1Op.V_CVT_NORM_I16_F16: _VOP1Op_V_CVT_NORM_I16_F16, VOP1Op.V_CVT_NORM_U16_F16: _VOP1Op_V_CVT_NORM_U16_F16, VOP1Op.V_SWAP_B32: _VOP1Op_V_SWAP_B32, @@ -4347,7 +4669,7 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_CVT_U32_U16: _VOP1Op_V_CVT_U32_U16, } -def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4361,7 +4683,7 @@ def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_DOT2ACC_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_DOT2ACC_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -4379,7 +4701,7 @@ def _VOP2Op_V_DOT2ACC_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4390,7 +4712,7 @@ def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4401,7 +4723,7 @@ def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S1.f32 - S0.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4412,7 +4734,7 @@ def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = S2.f32 @@ -4432,7 +4754,7 @@ def _VOP2Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = 0.0F @@ -4451,7 +4773,7 @@ def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4462,7 +4784,7 @@ def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) S0 = Reg(s0) S1 = Reg(s1) @@ -4473,7 +4795,7 @@ def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -4484,7 +4806,7 @@ def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) S0 = Reg(s0) S1 = Reg(s1) @@ -4495,7 +4817,7 @@ def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -4506,7 +4828,7 @@ def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -4566,7 +4888,7 @@ def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -4626,7 +4948,7 @@ def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -4637,7 +4959,7 @@ def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -4648,7 +4970,7 @@ def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4659,7 +4981,7 @@ def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4670,7 +4992,7 @@ def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 << S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4681,7 +5003,7 @@ def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4692,7 +5014,7 @@ def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4703,7 +5025,7 @@ def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4714,7 +5036,7 @@ def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4725,7 +5047,7 @@ def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4736,7 +5058,7 @@ def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4747,7 +5069,7 @@ def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -4767,7 +5089,7 @@ def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -4787,7 +5109,7 @@ def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -4807,7 +5129,7 @@ def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4818,7 +5140,7 @@ def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 - S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4829,7 +5151,7 @@ def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S1.u32 - S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4840,7 +5162,7 @@ def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -4851,7 +5173,7 @@ def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -4863,7 +5185,7 @@ def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -4875,7 +5197,7 @@ def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -4890,7 +5212,7 @@ def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -4901,7 +5223,7 @@ def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -4912,7 +5234,7 @@ def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S1.f16 - S0.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -4923,7 +5245,7 @@ def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -4934,7 +5256,7 @@ def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -4945,7 +5267,7 @@ def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -4957,7 +5279,7 @@ def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -4969,7 +5291,7 @@ def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -5029,7 +5351,7 @@ def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -5089,7 +5411,7 @@ def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -5100,7 +5422,7 @@ def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16); # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) S0 = Reg(s0) @@ -5162,22 +5484,25 @@ VOP2Op_FUNCTIONS = { VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16, } -def _VOP3Op_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -5186,15 +5511,18 @@ def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -5203,15 +5531,18 @@ def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5219,15 +5550,18 @@ def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -5236,15 +5570,18 @@ def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5252,15 +5589,18 @@ def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5268,15 +5608,18 @@ def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5285,15 +5628,18 @@ def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5302,15 +5648,18 @@ def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5319,15 +5668,18 @@ def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5336,15 +5688,18 @@ def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= @@ -5354,15 +5709,18 @@ def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5371,15 +5729,18 @@ def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -5389,15 +5750,18 @@ def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -5407,45 +5771,54 @@ def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -5454,15 +5827,18 @@ def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -5471,15 +5847,18 @@ def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5487,15 +5866,18 @@ def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -5504,15 +5886,18 @@ def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5520,15 +5905,18 @@ def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5536,15 +5924,18 @@ def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -5553,15 +5944,18 @@ def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -5570,15 +5964,18 @@ def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5587,15 +5984,18 @@ def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5604,15 +6004,18 @@ def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= @@ -5622,15 +6025,18 @@ def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5639,15 +6045,18 @@ def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -5657,15 +6066,18 @@ def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -5675,45 +6087,54 @@ def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -5722,15 +6143,18 @@ def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -5739,15 +6163,18 @@ def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5755,15 +6182,18 @@ def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -5772,15 +6202,18 @@ def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5788,15 +6221,18 @@ def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5804,15 +6240,18 @@ def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -5821,15 +6260,18 @@ def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -5838,15 +6280,18 @@ def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5855,15 +6300,18 @@ def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5872,15 +6320,18 @@ def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= @@ -5890,15 +6341,18 @@ def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5907,15 +6361,18 @@ def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -5925,15 +6382,18 @@ def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -5943,30 +6403,36 @@ def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -5975,15 +6441,18 @@ def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -5992,15 +6461,18 @@ def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6008,15 +6480,18 @@ def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -6025,15 +6500,18 @@ def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -6042,15 +6520,18 @@ def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6058,15 +6539,18 @@ def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -6075,15 +6559,18 @@ def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -6092,15 +6579,18 @@ def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6108,15 +6598,18 @@ def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -6125,15 +6618,18 @@ def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -6142,15 +6638,18 @@ def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6158,30 +6657,36 @@ def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -6190,15 +6695,18 @@ def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -6207,15 +6715,18 @@ def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6223,15 +6734,18 @@ def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -6240,15 +6754,18 @@ def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -6257,15 +6774,18 @@ def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6273,45 +6793,54 @@ def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -6320,15 +6849,18 @@ def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -6337,15 +6869,18 @@ def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6353,15 +6888,18 @@ def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -6370,15 +6908,18 @@ def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -6387,15 +6928,18 @@ def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6403,45 +6947,54 @@ def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -6450,15 +7003,18 @@ def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -6467,15 +7023,18 @@ def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6483,15 +7042,18 @@ def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -6500,15 +7062,18 @@ def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -6517,15 +7082,18 @@ def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6533,45 +7101,54 @@ def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -6580,15 +7157,18 @@ def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -6597,15 +7177,18 @@ def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6613,15 +7196,18 @@ def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -6630,15 +7216,18 @@ def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -6647,15 +7236,18 @@ def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6663,30 +7255,36 @@ def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -6723,6 +7321,7 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f16)): result = S1.u32[0] @@ -6741,9 +7340,11 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -6780,6 +7381,7 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f32)): result = S1.u32[0] @@ -6798,9 +7400,11 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -6837,6 +7441,7 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(S0.f64): result = S1.u32[0] @@ -6855,9 +7460,11 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -6868,7 +7475,7 @@ def _VOP3Op_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 < S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6881,7 +7488,7 @@ def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f16 == S1.f16 S0 = Reg(s0) @@ -6895,7 +7502,7 @@ def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6908,7 +7515,7 @@ def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 > S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6921,7 +7528,7 @@ def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <> S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6934,7 +7541,7 @@ def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 >= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6947,7 +7554,7 @@ def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -6960,7 +7567,7 @@ def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -6973,7 +7580,7 @@ def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -6987,7 +7594,7 @@ def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -7001,7 +7608,7 @@ def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -7015,7 +7622,7 @@ def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -7029,7 +7636,7 @@ def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -7043,7 +7650,7 @@ def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -7057,7 +7664,7 @@ def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -7068,7 +7675,7 @@ def _VOP3Op_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -7079,7 +7686,7 @@ def _VOP3Op_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 < S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7092,7 +7699,7 @@ def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f32 == S1.f32 S0 = Reg(s0) @@ -7106,7 +7713,7 @@ def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7119,7 +7726,7 @@ def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 > S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7132,7 +7739,7 @@ def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <> S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7145,7 +7752,7 @@ def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 >= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7158,7 +7765,7 @@ def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -7171,7 +7778,7 @@ def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -7184,7 +7791,7 @@ def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -7198,7 +7805,7 @@ def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -7212,7 +7819,7 @@ def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -7226,7 +7833,7 @@ def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -7240,7 +7847,7 @@ def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -7254,7 +7861,7 @@ def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -7268,7 +7875,7 @@ def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -7279,7 +7886,7 @@ def _VOP3Op_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -7290,7 +7897,7 @@ def _VOP3Op_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 < S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7303,7 +7910,7 @@ def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f64 == S1.f64 S0 = Reg(s0) @@ -7317,7 +7924,7 @@ def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7330,7 +7937,7 @@ def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 > S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7343,7 +7950,7 @@ def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <> S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7356,7 +7963,7 @@ def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 >= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7369,7 +7976,7 @@ def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -7382,7 +7989,7 @@ def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -7395,7 +8002,7 @@ def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -7409,7 +8016,7 @@ def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -7423,7 +8030,7 @@ def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -7437,7 +8044,7 @@ def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -7451,7 +8058,7 @@ def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -7465,7 +8072,7 @@ def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -7479,7 +8086,7 @@ def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -7490,7 +8097,7 @@ def _VOP3Op_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 < S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7503,7 +8110,7 @@ def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i16 == S1.i16 S0 = Reg(s0) @@ -7517,7 +8124,7 @@ def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7530,7 +8137,7 @@ def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 > S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7543,7 +8150,7 @@ def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <> S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7556,7 +8163,7 @@ def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 >= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7569,7 +8176,7 @@ def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 < S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7582,7 +8189,7 @@ def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u16 == S1.u16 S0 = Reg(s0) @@ -7596,7 +8203,7 @@ def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7609,7 +8216,7 @@ def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 > S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7622,7 +8229,7 @@ def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <> S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7635,7 +8242,7 @@ def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 >= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7648,7 +8255,7 @@ def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -7659,7 +8266,7 @@ def _VOP3Op_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7672,7 +8279,7 @@ def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i32 == S1.i32 S0 = Reg(s0) @@ -7686,7 +8293,7 @@ def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7699,7 +8306,7 @@ def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7712,7 +8319,7 @@ def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7725,7 +8332,7 @@ def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7738,7 +8345,7 @@ def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -7749,7 +8356,7 @@ def _VOP3Op_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -7760,7 +8367,7 @@ def _VOP3Op_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7773,7 +8380,7 @@ def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u32 == S1.u32 S0 = Reg(s0) @@ -7787,7 +8394,7 @@ def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7800,7 +8407,7 @@ def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7813,7 +8420,7 @@ def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7826,7 +8433,7 @@ def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7839,7 +8446,7 @@ def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -7850,7 +8457,7 @@ def _VOP3Op_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -7861,7 +8468,7 @@ def _VOP3Op_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 < S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7874,7 +8481,7 @@ def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i64 == S1.i64 S0 = Reg(s0) @@ -7888,7 +8495,7 @@ def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7901,7 +8508,7 @@ def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 > S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7914,7 +8521,7 @@ def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <> S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7927,7 +8534,7 @@ def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 >= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7940,7 +8547,7 @@ def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -7951,7 +8558,7 @@ def _VOP3Op_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -7962,7 +8569,7 @@ def _VOP3Op_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 < S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -7975,7 +8582,7 @@ def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u64 == S1.u64 S0 = Reg(s0) @@ -7989,7 +8596,7 @@ def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -8002,7 +8609,7 @@ def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 > S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -8015,7 +8622,7 @@ def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -8028,7 +8635,7 @@ def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 >= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -8041,7 +8648,7 @@ def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -8052,7 +8659,7 @@ def _VOP3Op_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -8105,7 +8712,7 @@ def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -8158,7 +8765,7 @@ def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -8211,7 +8818,7 @@ def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -8221,7 +8828,7 @@ def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'U; # if WAVE64 then # // 64 lanes @@ -8264,7 +8871,7 @@ def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f64_to_i32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8274,7 +8881,7 @@ def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = i32_to_f64(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -8285,7 +8892,7 @@ def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -8295,7 +8902,7 @@ def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8305,7 +8912,7 @@ def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8315,7 +8922,7 @@ def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8325,7 +8932,7 @@ def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8335,7 +8942,7 @@ def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8345,7 +8952,7 @@ def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) S0 = Reg(s0) D0 = Reg(d0) @@ -8355,7 +8962,7 @@ def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32)) S0 = Reg(s0) D0 = Reg(d0) @@ -8365,7 +8972,7 @@ def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f64_to_f32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8375,7 +8982,7 @@ def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = f32_to_f64(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8386,7 +8993,7 @@ def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[7 : 0].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8396,7 +9003,7 @@ def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[15 : 8].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8406,7 +9013,7 @@ def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[23 : 16].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8416,7 +9023,7 @@ def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[31 : 24].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8426,7 +9033,7 @@ def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f64_to_u32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8436,7 +9043,7 @@ def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = u32_to_f64(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8447,7 +9054,7 @@ def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8458,7 +9065,7 @@ def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -8474,7 +9081,7 @@ def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -8490,7 +9097,7 @@ def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -8506,7 +9113,7 @@ def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b16 = S0.b16 S0 = Reg(s0) D0 = Reg(d0) @@ -8516,7 +9123,7 @@ def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + -floor(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8526,7 +9133,7 @@ def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8536,7 +9143,7 @@ def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -8551,7 +9158,7 @@ def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -8566,7 +9173,7 @@ def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -8581,7 +9188,7 @@ def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8591,7 +9198,7 @@ def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8601,7 +9208,7 @@ def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -8611,7 +9218,7 @@ def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception S0 = Reg(s0) @@ -8622,7 +9229,7 @@ def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8632,7 +9239,7 @@ def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / S0.f64 S0 = Reg(s0) D0 = Reg(d0) @@ -8643,7 +9250,7 @@ def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8654,7 +9261,7 @@ def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8664,7 +9271,7 @@ def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8675,7 +9282,7 @@ def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -8685,7 +9292,7 @@ def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -8695,7 +9302,7 @@ def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32 S0 = Reg(s0) D0 = Reg(d0) @@ -8705,7 +9312,7 @@ def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -8715,7 +9322,7 @@ def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -8735,7 +9342,7 @@ def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -8755,7 +9362,7 @@ def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -8775,7 +9382,7 @@ def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -8792,7 +9399,7 @@ def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -8810,7 +9417,7 @@ def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + -floor(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8821,7 +9428,7 @@ def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -8838,7 +9445,7 @@ def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -8855,7 +9462,7 @@ def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # addr = SRC0.u32; # // Raw value from instruction # D0.b32 = VGPR[laneId][addr].b32 @@ -8869,7 +9476,7 @@ def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = u16_to_f16(S0.u16) S0 = Reg(s0) D0 = Reg(d0) @@ -8879,7 +9486,7 @@ def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = i16_to_f16(S0.i16) S0 = Reg(s0) D0 = Reg(d0) @@ -8889,7 +9496,7 @@ def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_u16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8899,7 +9506,7 @@ def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_i16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8909,7 +9516,7 @@ def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16 S0 = Reg(s0) D0 = Reg(d0) @@ -8919,7 +9526,7 @@ def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8929,7 +9536,7 @@ def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8939,7 +9546,7 @@ def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8949,7 +9556,7 @@ def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8959,7 +9566,7 @@ def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.f16 = S0.f16 # else @@ -8976,7 +9583,7 @@ def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.i16 = 16'0 # else @@ -8993,7 +9600,7 @@ def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -9008,7 +9615,7 @@ def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -9023,7 +9630,7 @@ def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -9033,7 +9640,7 @@ def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -9048,7 +9655,7 @@ def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + -floor(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -9058,7 +9665,7 @@ def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -9068,7 +9675,7 @@ def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -9078,7 +9685,17 @@ def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.b16 = { SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16) } + S0 = Reg(s0) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0.b16 = _pack(SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16)) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_snorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -9088,7 +9705,7 @@ def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_unorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -9098,7 +9715,7 @@ def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = ~S0.u16 S0 = Reg(s0) D0 = Reg(d0) @@ -9108,7 +9725,7 @@ def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -9118,7 +9735,7 @@ def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { 16'0, S0.u16 } S0 = Reg(s0) D0 = Reg(d0) @@ -9128,7 +9745,7 @@ def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9142,7 +9759,7 @@ def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9153,7 +9770,7 @@ def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9164,7 +9781,7 @@ def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S1.f32 - S0.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9175,7 +9792,7 @@ def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = S2.f32 @@ -9195,7 +9812,7 @@ def _VOP3Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = 0.0F @@ -9214,7 +9831,7 @@ def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9225,7 +9842,7 @@ def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) S0 = Reg(s0) S1 = Reg(s1) @@ -9236,7 +9853,7 @@ def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -9247,7 +9864,7 @@ def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) S0 = Reg(s0) S1 = Reg(s1) @@ -9258,7 +9875,7 @@ def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -9269,7 +9886,7 @@ def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -9329,7 +9946,7 @@ def _VOP3Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f32)) then @@ -9389,7 +10006,7 @@ def _VOP3Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -9400,7 +10017,7 @@ def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -9411,7 +10028,7 @@ def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9422,7 +10039,7 @@ def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9433,7 +10050,7 @@ def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 << S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9444,7 +10061,7 @@ def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9455,7 +10072,7 @@ def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9466,7 +10083,7 @@ def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9477,7 +10094,7 @@ def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9488,7 +10105,7 @@ def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9499,7 +10116,7 @@ def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9510,7 +10127,7 @@ def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9521,7 +10138,7 @@ def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 - S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9532,7 +10149,7 @@ def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S1.u32 - S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9543,7 +10160,7 @@ def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -9554,7 +10171,7 @@ def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -9569,7 +10186,7 @@ def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9580,7 +10197,7 @@ def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9591,7 +10208,7 @@ def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S1.f16 - S0.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9602,7 +10219,7 @@ def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9613,7 +10230,7 @@ def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -9624,7 +10241,7 @@ def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -9684,7 +10301,7 @@ def _VOP3Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(64'F(S0.f16)) then @@ -9744,7 +10361,7 @@ def _VOP3Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -9755,7 +10372,7 @@ def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = S2.f32 @@ -9775,7 +10392,7 @@ def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -9787,7 +10404,7 @@ def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9799,7 +10416,7 @@ def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9848,7 +10465,7 @@ def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = cubemap S coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9890,7 +10507,7 @@ def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = cubemap T coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9925,7 +10542,7 @@ def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = 2.0 * cubemap major axis. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9953,7 +10570,7 @@ def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) S0 = Reg(s0) S1 = Reg(s1) @@ -9965,7 +10582,7 @@ def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) S0 = Reg(s0) @@ -9980,7 +10597,7 @@ def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) S0 = Reg(s0) S1 = Reg(s1) @@ -9992,7 +10609,7 @@ def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10004,7 +10621,7 @@ def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = fma(S0.f64, S1.f64, S2.f64) S0 = Reg(s0) S1 = Reg(s1) @@ -10017,7 +10634,7 @@ def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); @@ -10038,7 +10655,7 @@ def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0].u32) & 0xffffffffLL) S0 = Reg(s0) S1 = Reg(s1) @@ -10050,7 +10667,7 @@ def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0].u32 * 8U)) & 0xffffffffLL) S0 = Reg(s0) S1 = Reg(s1) @@ -10062,7 +10679,7 @@ def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S1.f32 == -MAX_FLOAT_F32) || (64'F(S1.f32) == -INF) || isNAN(64'F(S1.f32)) || (S2.f32 <= 0.0F) || # isNAN(64'F(S2.f32))) then # D0.f32 = -MAX_FLOAT_F32 @@ -10082,7 +10699,7 @@ def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10094,7 +10711,7 @@ def _VOP3Op_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10106,7 +10723,7 @@ def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10118,7 +10735,7 @@ def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10130,7 +10747,7 @@ def _VOP3Op_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10142,7 +10759,7 @@ def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10154,7 +10771,7 @@ def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then # D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then @@ -10181,7 +10798,7 @@ def _VOP3Op_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then # D0.i32 = v_max_i32(S1.i32, S2.i32) # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then @@ -10204,7 +10821,7 @@ def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then # D0.u32 = v_max_u32(S1.u32, S2.u32) # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then @@ -10227,7 +10844,7 @@ def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -10251,7 +10868,7 @@ def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10263,7 +10880,7 @@ def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); @@ -10283,7 +10900,7 @@ def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 S0 = Reg(s0) @@ -10296,7 +10913,7 @@ def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); # D0.u32 = tmp @@ -10313,7 +10930,7 @@ def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f32) ^ sign(S2.f32)); # if isNAN(64'F(S2.f32)) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) @@ -10366,7 +10983,7 @@ def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f64) ^ sign(S2.f64)); # if isNAN(S2.f64) then # D0.f64 = cvtToQuietNAN(S2.f64) @@ -10420,7 +11037,7 @@ def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if VCC.u64[laneId] then # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) # else @@ -10442,7 +11059,7 @@ def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if VCC.u64[laneId] then # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) # else @@ -10465,7 +11082,7 @@ def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -10489,7 +11106,7 @@ def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -10511,7 +11128,7 @@ def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -10533,7 +11150,7 @@ def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); @@ -10554,7 +11171,7 @@ def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10566,7 +11183,7 @@ def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 + S2.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -10578,7 +11195,25 @@ def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_PERM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0[31 : 24] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[31 : 24]); + # D0[23 : 16] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[23 : 16]); + # D0[15 : 8] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[15 : 8]); + # D0[7 : 0] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[7 : 0]) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24]) + D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16]) + D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8]) + D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0]) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10590,7 +11225,7 @@ def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10602,7 +11237,7 @@ def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10614,7 +11249,7 @@ def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10626,7 +11261,7 @@ def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10638,7 +11273,7 @@ def _VOP3Op_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) S0 = Reg(s0) S1 = Reg(s1) @@ -10650,7 +11285,7 @@ def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -10662,7 +11297,7 @@ def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10674,7 +11309,7 @@ def _VOP3Op_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) S0 = Reg(s0) S1 = Reg(s1) @@ -10686,7 +11321,7 @@ def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -10698,7 +11333,7 @@ def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then # D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then @@ -10725,7 +11360,7 @@ def _VOP3Op_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then # D0.i16 = v_max_i16(S1.i16, S2.i16) # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then @@ -10748,7 +11383,7 @@ def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then # D0.u16 = v_max_u16(S1.u16, S2.u16) # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then @@ -10771,7 +11406,7 @@ def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 * S1.i16 + S2.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -10783,7 +11418,7 @@ def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f16) ^ sign(S2.f16)); # if isNAN(64'F(S2.f16)) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) @@ -10828,7 +11463,7 @@ def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10840,7 +11475,7 @@ def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10852,7 +11487,7 @@ def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10864,7 +11499,7 @@ def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32 | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10876,7 +11511,7 @@ def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10888,7 +11523,7 @@ def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -10900,7 +11535,7 @@ def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = VCC.u64[laneId] ? S1.u16 : S0.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -10914,7 +11549,7 @@ def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3Op_V_MAXMIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_min_f32(v_max_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10926,7 +11561,7 @@ def _VOP3Op_V_MAXMIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_max_f32(v_min_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10938,7 +11573,7 @@ def _VOP3Op_V_MINMAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXMIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_min_f16(v_max_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10950,7 +11585,7 @@ def _VOP3Op_V_MAXMIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_max_f16(v_min_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10962,7 +11597,7 @@ def _VOP3Op_V_MINMAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10974,7 +11609,7 @@ def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10986,7 +11621,7 @@ def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10998,7 +11633,7 @@ def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -11010,7 +11645,7 @@ def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f16; # tmp += S0[15 : 0].f16 * S1[15 : 0].f16; # tmp += S0[31 : 16].f16 * S1[31 : 16].f16; @@ -11029,7 +11664,7 @@ def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.bf16; # tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16; # tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16; @@ -11048,7 +11683,7 @@ def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 + S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11059,7 +11694,7 @@ def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 - S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11070,7 +11705,7 @@ def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11081,7 +11716,7 @@ def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[31 : 16] = 16'B(v_cvt_i16_f32(S1.f32)); # tmp[15 : 0] = 16'B(v_cvt_i16_f32(S0.f32)); @@ -11095,7 +11730,7 @@ def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[31 : 16] = 16'B(v_cvt_u16_f32(S1.f32)); # tmp[15 : 0] = 16'B(v_cvt_u16_f32(S0.f32)); @@ -11109,7 +11744,7 @@ def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11120,7 +11755,7 @@ def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11131,7 +11766,7 @@ def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11142,7 +11777,7 @@ def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11153,7 +11788,7 @@ def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 + S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11164,7 +11799,7 @@ def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 - S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11175,7 +11810,7 @@ def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[31 : 16].f16 = S1.f16; # D0[15 : 0].f16 = S0.f16 S0 = Reg(s0) @@ -11188,7 +11823,7 @@ def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); @@ -11202,7 +11837,7 @@ def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); @@ -11216,7 +11851,7 @@ def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * 2.0F ** S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11227,7 +11862,7 @@ def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11238,7 +11873,7 @@ def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32; # for i in 0 : 31 do # tmp += S0[i].u32; @@ -11258,7 +11893,7 @@ def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); @@ -11272,7 +11907,7 @@ def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); @@ -11286,7 +11921,7 @@ def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = u32_to_u16(S0.u32); # tmp[31 : 16].u16 = u32_to_u16(S1.u32); @@ -11300,7 +11935,7 @@ def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = i32_to_i16(S0.i32); # tmp[31 : 16].i16 = i32_to_i16(S1.i32); @@ -11314,7 +11949,7 @@ def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 - S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11325,7 +11960,7 @@ def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 + S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11336,7 +11971,7 @@ def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -11348,7 +11983,7 @@ def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -11360,7 +11995,7 @@ def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where -0.0 < +0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(S0.f64) then @@ -11421,7 +12056,7 @@ def _VOP3Op_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Version of comparison where +0.0 > -0.0, differs from IEEE # if WAVE_MODE.IEEE then # if isSignalNAN(S0.f64) then @@ -11482,7 +12117,7 @@ def _VOP3Op_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * 2.0 ** S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11494,7 +12129,7 @@ def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 * S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -11505,7 +12140,7 @@ def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -11516,7 +12151,7 @@ def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -11527,7 +12162,7 @@ def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 << S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11538,7 +12173,7 @@ def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11549,7 +12184,7 @@ def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = (S1.i16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11560,7 +12195,7 @@ def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 << S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11572,7 +12207,7 @@ def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 >> S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11584,7 +12219,7 @@ def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i64 = (S1.i64 >> S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11596,7 +12231,7 @@ def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'U; # if WAVE32 then # lane = S1.u32[4 : 0].u32; @@ -11619,7 +12254,7 @@ def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S0.u16 & S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -11630,7 +12265,7 @@ def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S0.u16 | S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -11641,7 +12276,7 @@ def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S0.u16 ^ S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -11913,6 +12548,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_FRACT_F16: _VOP3Op_V_FRACT_F16, VOP3Op.V_SIN_F16: _VOP3Op_V_SIN_F16, VOP3Op.V_COS_F16: _VOP3Op_V_COS_F16, + VOP3Op.V_SAT_PK_U8_I16: _VOP3Op_V_SAT_PK_U8_I16, VOP3Op.V_CVT_NORM_I16_F16: _VOP3Op_V_CVT_NORM_I16_F16, VOP3Op.V_CVT_NORM_U16_F16: _VOP3Op_V_CVT_NORM_U16_F16, VOP3Op.V_NOT_B16: _VOP3Op_V_NOT_B16, @@ -11995,6 +12631,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_MQSAD_U32_U8: _VOP3Op_V_MQSAD_U32_U8, VOP3Op.V_XOR3_B32: _VOP3Op_V_XOR3_B32, VOP3Op.V_MAD_U16: _VOP3Op_V_MAD_U16, + VOP3Op.V_PERM_B32: _VOP3Op_V_PERM_B32, VOP3Op.V_XAD_U32: _VOP3Op_V_XAD_U32, VOP3Op.V_LSHL_ADD_U32: _VOP3Op_V_LSHL_ADD_U32, VOP3Op.V_ADD_LSHL_U32: _VOP3Op_V_ADD_LSHL_U32, @@ -12070,7 +12707,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_XOR_B16: _VOP3Op_V_XOR_B16, } -def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -12090,7 +12727,7 @@ def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12110,7 +12747,7 @@ def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12130,7 +12767,7 @@ def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC = 0x0LL; # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # D0.f32 = NAN.f32 @@ -12193,7 +12830,7 @@ def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC = 0x0LL; # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then # D0.f64 = NAN.f64 @@ -12257,7 +12894,7 @@ def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3SDOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) S0 = Reg(s0) S1 = Reg(s1) @@ -12274,7 +12911,7 @@ def _VOP3SDOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d1'] = D1._val & 1 return result -def _VOP3SDOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) S0 = Reg(s0) S1 = Reg(s1) @@ -12291,7 +12928,7 @@ def _VOP3SDOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d1'] = D1._val & 1 return result -def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -12311,7 +12948,7 @@ def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12331,7 +12968,7 @@ def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32; # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12364,7 +13001,7 @@ VOP3SDOp_FUNCTIONS = { VOP3SDOp.V_SUBREV_CO_U32: _VOP3SDOp_V_SUBREV_CO_U32, } -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; # D0.b32 = tmp.b32 @@ -12381,7 +13018,7 @@ def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12397,7 +13034,7 @@ def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; # D0.b32 = tmp.b32 @@ -12413,7 +13050,7 @@ def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; # D0.b32 = tmp.b32 @@ -12429,7 +13066,7 @@ def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -12445,7 +13082,7 @@ def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -12461,7 +13098,7 @@ def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -12477,7 +13114,7 @@ def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # D0.b32 = tmp.b32 @@ -12493,7 +13130,7 @@ def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # D0.b32 = tmp.b32 @@ -12509,7 +13146,7 @@ def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12526,7 +13163,7 @@ def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12542,7 +13179,7 @@ def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12558,7 +13195,7 @@ def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12574,7 +13211,7 @@ def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12590,7 +13227,7 @@ def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); @@ -12608,7 +13245,7 @@ def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; # D0.b32 = tmp.b32 @@ -12624,7 +13261,7 @@ def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; # D0.b32 = tmp.b32 @@ -12640,7 +13277,7 @@ def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16); # tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16); # D0.b32 = tmp.b32 @@ -12656,7 +13293,7 @@ def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16); # tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16); # D0.b32 = tmp.b32 @@ -12672,7 +13309,7 @@ def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -12691,7 +13328,7 @@ def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); @@ -12714,7 +13351,7 @@ def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); @@ -12745,7 +13382,7 @@ def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); @@ -12790,13 +13427,14 @@ VOP3POp_FUNCTIONS = { VOP3POp.V_DOT2_F32_BF16: _VOP3POp_V_DOT2_F32_BF16, } -def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -12804,9 +13442,11 @@ def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -12815,6 +13455,7 @@ def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- @@ -12822,9 +13463,11 @@ def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -12833,6 +13476,7 @@ def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- @@ -12840,9 +13484,11 @@ def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12850,6 +13496,7 @@ def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- @@ -12857,9 +13504,11 @@ def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -12868,6 +13517,7 @@ def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- @@ -12875,9 +13525,11 @@ def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12885,6 +13537,7 @@ def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- @@ -12892,9 +13545,11 @@ def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -12902,6 +13557,7 @@ def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- @@ -12909,9 +13565,11 @@ def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -12920,6 +13578,7 @@ def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- @@ -12927,9 +13586,11 @@ def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -12938,6 +13599,7 @@ def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- @@ -12945,9 +13607,11 @@ def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -12956,6 +13620,7 @@ def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- @@ -12963,9 +13628,11 @@ def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -12974,6 +13641,7 @@ def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- @@ -12981,9 +13649,11 @@ def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= @@ -12993,6 +13663,7 @@ def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- @@ -13000,9 +13671,11 @@ def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -13011,6 +13684,7 @@ def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- @@ -13018,9 +13692,11 @@ def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -13030,6 +13706,7 @@ def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- @@ -13037,9 +13714,11 @@ def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -13049,6 +13728,7 @@ def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- @@ -13056,15 +13736,18 @@ def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -13072,15 +13755,18 @@ def _VOPCOp_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -13088,9 +13774,11 @@ def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -13099,6 +13787,7 @@ def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- @@ -13106,9 +13795,11 @@ def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -13117,6 +13808,7 @@ def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- @@ -13124,9 +13816,11 @@ def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13134,6 +13828,7 @@ def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- @@ -13141,9 +13836,11 @@ def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -13152,6 +13849,7 @@ def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- @@ -13159,9 +13857,11 @@ def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13169,6 +13869,7 @@ def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- @@ -13176,9 +13877,11 @@ def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13186,6 +13889,7 @@ def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- @@ -13193,9 +13897,11 @@ def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -13204,6 +13910,7 @@ def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- @@ -13211,9 +13918,11 @@ def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -13222,6 +13931,7 @@ def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- @@ -13229,9 +13939,11 @@ def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -13240,6 +13952,7 @@ def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- @@ -13247,9 +13960,11 @@ def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -13258,6 +13973,7 @@ def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- @@ -13265,9 +13981,11 @@ def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= @@ -13277,6 +13995,7 @@ def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- @@ -13284,9 +14003,11 @@ def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -13295,6 +14016,7 @@ def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- @@ -13302,9 +14024,11 @@ def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -13314,6 +14038,7 @@ def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- @@ -13321,9 +14046,11 @@ def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -13333,6 +14060,7 @@ def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- @@ -13340,15 +14068,18 @@ def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -13356,15 +14087,18 @@ def _VOPCOp_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -13372,9 +14106,11 @@ def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -13383,6 +14119,7 @@ def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- @@ -13390,9 +14127,11 @@ def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -13401,6 +14140,7 @@ def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- @@ -13408,9 +14148,11 @@ def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13418,6 +14160,7 @@ def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- @@ -13425,9 +14168,11 @@ def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -13436,6 +14181,7 @@ def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- @@ -13443,9 +14189,11 @@ def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13453,6 +14201,7 @@ def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- @@ -13460,9 +14209,11 @@ def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13470,6 +14221,7 @@ def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- @@ -13477,9 +14229,11 @@ def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -13488,6 +14242,7 @@ def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- @@ -13495,9 +14250,11 @@ def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -13506,6 +14263,7 @@ def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- @@ -13513,9 +14271,11 @@ def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -13524,6 +14284,7 @@ def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- @@ -13531,9 +14292,11 @@ def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -13542,6 +14305,7 @@ def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- @@ -13549,9 +14313,11 @@ def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= @@ -13561,6 +14327,7 @@ def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- @@ -13568,9 +14335,11 @@ def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -13579,6 +14348,7 @@ def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- @@ -13586,9 +14356,11 @@ def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -13598,6 +14370,7 @@ def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- @@ -13605,9 +14378,11 @@ def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -13617,6 +14392,7 @@ def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- @@ -13624,15 +14400,18 @@ def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -13640,9 +14419,11 @@ def _VOPCOp_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -13651,6 +14432,7 @@ def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- @@ -13658,9 +14440,11 @@ def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -13669,6 +14453,7 @@ def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- @@ -13676,9 +14461,11 @@ def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13686,6 +14473,7 @@ def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- @@ -13693,9 +14481,11 @@ def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -13704,6 +14494,7 @@ def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- @@ -13711,9 +14502,11 @@ def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -13722,6 +14515,7 @@ def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- @@ -13729,9 +14523,11 @@ def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13739,6 +14535,7 @@ def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- @@ -13746,9 +14543,11 @@ def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -13757,6 +14556,7 @@ def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- @@ -13764,9 +14564,11 @@ def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -13775,6 +14577,7 @@ def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- @@ -13782,9 +14585,11 @@ def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13792,6 +14597,7 @@ def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- @@ -13799,9 +14605,11 @@ def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -13810,6 +14618,7 @@ def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- @@ -13817,9 +14626,11 @@ def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -13828,6 +14639,7 @@ def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- @@ -13835,9 +14647,11 @@ def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13845,6 +14659,7 @@ def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- @@ -13852,15 +14667,18 @@ def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -13868,9 +14686,11 @@ def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -13879,6 +14699,7 @@ def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- @@ -13886,9 +14707,11 @@ def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -13897,6 +14720,7 @@ def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- @@ -13904,9 +14728,11 @@ def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13914,6 +14740,7 @@ def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- @@ -13921,9 +14748,11 @@ def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -13932,6 +14761,7 @@ def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- @@ -13939,9 +14769,11 @@ def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -13950,6 +14782,7 @@ def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- @@ -13957,9 +14790,11 @@ def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13967,6 +14802,7 @@ def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- @@ -13974,15 +14810,18 @@ def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -13990,15 +14829,18 @@ def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -14006,9 +14848,11 @@ def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -14017,6 +14861,7 @@ def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- @@ -14024,9 +14869,11 @@ def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -14035,6 +14882,7 @@ def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- @@ -14042,9 +14890,11 @@ def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14052,6 +14902,7 @@ def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- @@ -14059,9 +14910,11 @@ def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -14070,6 +14923,7 @@ def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- @@ -14077,9 +14931,11 @@ def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -14088,6 +14944,7 @@ def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- @@ -14095,9 +14952,11 @@ def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14105,6 +14964,7 @@ def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- @@ -14112,15 +14972,18 @@ def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -14128,15 +14991,18 @@ def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -14144,9 +15010,11 @@ def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -14155,6 +15023,7 @@ def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- @@ -14162,9 +15031,11 @@ def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -14173,6 +15044,7 @@ def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- @@ -14180,9 +15052,11 @@ def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14190,6 +15064,7 @@ def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- @@ -14197,9 +15072,11 @@ def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -14208,6 +15085,7 @@ def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- @@ -14215,9 +15093,11 @@ def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -14226,6 +15106,7 @@ def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- @@ -14233,9 +15114,11 @@ def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14243,6 +15126,7 @@ def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- @@ -14250,15 +15134,18 @@ def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -14266,15 +15153,18 @@ def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'0U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 0 # --- end pseudocode --- @@ -14282,9 +15172,11 @@ def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -14293,6 +15185,7 @@ def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- @@ -14300,9 +15193,11 @@ def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -14311,6 +15206,7 @@ def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- @@ -14318,9 +15214,11 @@ def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14328,6 +15226,7 @@ def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- @@ -14335,9 +15234,11 @@ def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -14346,6 +15247,7 @@ def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- @@ -14353,9 +15255,11 @@ def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -14364,6 +15268,7 @@ def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- @@ -14371,9 +15276,11 @@ def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14381,6 +15288,7 @@ def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- @@ -14388,15 +15296,18 @@ def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. # D0.u64[laneId] = 1'1U; # // D0 = VCC in VOPC encoding. D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = 1 # --- end pseudocode --- @@ -14404,9 +15315,11 @@ def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -14443,6 +15356,7 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f16)): result = S1.u32[0] @@ -14462,9 +15376,11 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -14501,6 +15417,7 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f32)): result = S1.u32[0] @@ -14520,9 +15437,11 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -14559,6 +15478,7 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(S0.f64): result = S1.u32[0] @@ -14578,9 +15498,11 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -14591,7 +15513,7 @@ def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 < S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14604,7 +15526,7 @@ def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f16 == S1.f16 S0 = Reg(s0) @@ -14618,7 +15540,7 @@ def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14631,7 +15553,7 @@ def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 > S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14644,7 +15566,7 @@ def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <> S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14657,7 +15579,7 @@ def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 >= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14670,7 +15592,7 @@ def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -14683,7 +15605,7 @@ def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -14696,7 +15618,7 @@ def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -14710,7 +15632,7 @@ def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -14724,7 +15646,7 @@ def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -14738,7 +15660,7 @@ def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -14752,7 +15674,7 @@ def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -14766,7 +15688,7 @@ def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -14780,7 +15702,7 @@ def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -14791,7 +15713,7 @@ def _VOPCOp_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -14802,7 +15724,7 @@ def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 < S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14815,7 +15737,7 @@ def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f32 == S1.f32 S0 = Reg(s0) @@ -14829,7 +15751,7 @@ def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14842,7 +15764,7 @@ def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 > S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14855,7 +15777,7 @@ def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <> S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14868,7 +15790,7 @@ def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 >= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14881,7 +15803,7 @@ def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -14894,7 +15816,7 @@ def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -14907,7 +15829,7 @@ def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -14921,7 +15843,7 @@ def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -14935,7 +15857,7 @@ def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -14949,7 +15871,7 @@ def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -14963,7 +15885,7 @@ def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -14977,7 +15899,7 @@ def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -14991,7 +15913,7 @@ def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -15002,7 +15924,7 @@ def _VOPCOp_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -15013,7 +15935,7 @@ def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 < S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15026,7 +15948,7 @@ def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f64 == S1.f64 S0 = Reg(s0) @@ -15040,7 +15962,7 @@ def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15053,7 +15975,7 @@ def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 > S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15066,7 +15988,7 @@ def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <> S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15079,7 +16001,7 @@ def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 >= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15092,7 +16014,7 @@ def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -15105,7 +16027,7 @@ def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -15118,7 +16040,7 @@ def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -15132,7 +16054,7 @@ def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -15146,7 +16068,7 @@ def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -15160,7 +16082,7 @@ def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -15174,7 +16096,7 @@ def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -15188,7 +16110,7 @@ def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -15202,7 +16124,7 @@ def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -15213,7 +16135,7 @@ def _VOPCOp_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 < S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15226,7 +16148,7 @@ def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i16 == S1.i16 S0 = Reg(s0) @@ -15240,7 +16162,7 @@ def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15253,7 +16175,7 @@ def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 > S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15266,7 +16188,7 @@ def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <> S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15279,7 +16201,7 @@ def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 >= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15292,7 +16214,7 @@ def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 < S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15305,7 +16227,7 @@ def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u16 == S1.u16 S0 = Reg(s0) @@ -15319,7 +16241,7 @@ def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15332,7 +16254,7 @@ def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 > S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15345,7 +16267,7 @@ def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <> S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15358,7 +16280,7 @@ def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 >= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15371,7 +16293,7 @@ def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -15382,7 +16304,7 @@ def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15395,7 +16317,7 @@ def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i32 == S1.i32 S0 = Reg(s0) @@ -15409,7 +16331,7 @@ def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15422,7 +16344,7 @@ def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15435,7 +16357,7 @@ def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15448,7 +16370,7 @@ def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15461,7 +16383,7 @@ def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -15472,7 +16394,7 @@ def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -15483,7 +16405,7 @@ def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15496,7 +16418,7 @@ def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u32 == S1.u32 S0 = Reg(s0) @@ -15510,7 +16432,7 @@ def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15523,7 +16445,7 @@ def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15536,7 +16458,7 @@ def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15549,7 +16471,7 @@ def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15562,7 +16484,7 @@ def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -15573,7 +16495,7 @@ def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -15584,7 +16506,7 @@ def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 < S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15597,7 +16519,7 @@ def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i64 == S1.i64 S0 = Reg(s0) @@ -15611,7 +16533,7 @@ def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15624,7 +16546,7 @@ def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 > S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15637,7 +16559,7 @@ def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <> S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15650,7 +16572,7 @@ def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 >= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15663,7 +16585,7 @@ def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -15674,7 +16596,7 @@ def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'0U EXEC = Reg(exec_mask) laneId = lane @@ -15685,7 +16607,7 @@ def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 < S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15698,7 +16620,7 @@ def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u64 == S1.u64 S0 = Reg(s0) @@ -15712,7 +16634,7 @@ def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15725,7 +16647,7 @@ def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 > S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15738,7 +16660,7 @@ def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15751,7 +16673,7 @@ def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 >= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15764,7 +16686,7 @@ def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = 1'1U EXEC = Reg(exec_mask) laneId = lane @@ -15775,7 +16697,7 @@ def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -15828,7 +16750,7 @@ def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -15881,7 +16803,7 @@ def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. diff --git a/extra/assembly/amd/autogen/rdna4/gen_pcode.py b/extra/assembly/amd/autogen/rdna4/gen_pcode.py index e7cc670a9b..15a92ee453 100644 --- a/extra/assembly/amd/autogen/rdna4/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna4/gen_pcode.py @@ -5,7 +5,7 @@ from extra.assembly.amd.autogen.rdna4 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -15,7 +15,7 @@ def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b64 = S0.b64 S0 = Reg(s0) D0 = Reg(d0) @@ -26,7 +26,7 @@ def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.b32 = S0.b32 # endif @@ -40,7 +40,7 @@ def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.b64 = S0.b64 # endif @@ -55,7 +55,7 @@ def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -65,7 +65,7 @@ def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[63 : 0] = S0.u64[0 : 63] S0 = Reg(s0) D0 = Reg(d0) @@ -76,7 +76,7 @@ def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -99,7 +99,7 @@ def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -122,7 +122,7 @@ def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -145,7 +145,7 @@ def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if no ones are found # for i in 0 : 63 do @@ -168,7 +168,7 @@ def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -191,7 +191,7 @@ def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = -1; # // Set if all bits are the same # for i in 1 : 63 do @@ -214,7 +214,7 @@ def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i8)) S0 = Reg(s0) D0 = Reg(d0) @@ -224,7 +224,7 @@ def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -234,7 +234,7 @@ def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[S0.u32[4 : 0]] = 1'0U S0 = Reg(s0) D0 = Reg(d0) @@ -244,7 +244,7 @@ def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[S0.u32[5 : 0]] = 1'0U S0 = Reg(s0) D0 = Reg(d0) @@ -255,7 +255,7 @@ def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[S0.u32[4 : 0]] = 1'1U S0 = Reg(s0) D0 = Reg(d0) @@ -265,7 +265,7 @@ def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[S0.u32[5 : 0]] = 1'1U S0 = Reg(s0) D0 = Reg(d0) @@ -276,7 +276,7 @@ def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32; # for i in 0 : 31 do # D0.u64[i * 2] = tmp[i]; @@ -295,7 +295,7 @@ def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li result['d0_64'] = True return result -def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; # SCC = D0.i32 != 0 S0 = Reg(s0) @@ -308,7 +308,7 @@ def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'0U ? 1 : 0 @@ -329,7 +329,7 @@ def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'0U ? 1 : 0 @@ -351,7 +351,7 @@ def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 31 do # tmp += S0.u32[i] == 1'1U ? 1 : 0 @@ -372,7 +372,7 @@ def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0; # for i in 0 : 63 do # tmp += S0.u64[i] == 1'1U ? 1 : 0 @@ -394,7 +394,7 @@ def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # for i in 0 : 7 do # tmp[i] = S0.u32[i * 4 +: 4] != 0U @@ -415,7 +415,7 @@ def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0ULL; # for i in 0 : 15 do # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL @@ -437,7 +437,7 @@ def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0U; # declare i : 6'U; # for i in 6'0U : 6'31U do @@ -459,7 +459,7 @@ def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 0ULL; # declare i : 6'U; # for i in 6'0U : 6'63U do @@ -482,7 +482,7 @@ def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32; # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -495,7 +495,7 @@ def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~S0.u64; # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -509,7 +509,7 @@ def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -531,7 +531,7 @@ def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -554,7 +554,7 @@ def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination # saveexec = EXEC.u32; @@ -576,7 +576,7 @@ def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination # saveexec = EXEC.u64; @@ -599,7 +599,7 @@ def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -621,7 +621,7 @@ def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -644,7 +644,7 @@ def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -666,7 +666,7 @@ def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -689,7 +689,7 @@ def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -711,7 +711,7 @@ def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -734,7 +734,7 @@ def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d0_64'] = True return result -def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u32; @@ -756,7 +756,7 @@ def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar # saveexec = EXEC.u64; @@ -779,7 +779,7 @@ def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u32; @@ -801,7 +801,7 @@ def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; @@ -824,7 +824,7 @@ def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result['d0_64'] = True return result -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u32; @@ -846,7 +846,7 @@ def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; @@ -869,7 +869,7 @@ def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u32; @@ -891,7 +891,7 @@ def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into # saveexec = EXEC.u64; @@ -914,7 +914,7 @@ def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l result['d0_64'] = True return result -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u32; @@ -936,7 +936,7 @@ def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the # saveexec = EXEC.u64; @@ -959,7 +959,7 @@ def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, li result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -979,7 +979,7 @@ def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -1000,7 +1000,7 @@ def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result['d0_64'] = True return result -def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -1020,7 +1020,7 @@ def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is @@ -1041,9 +1041,65 @@ def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result['d0_64'] = True return result -def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_GETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.i64 = PC + 4LL + D0 = Reg(d0) + PC = Reg(pc) + # --- compiled pseudocode --- + D0.i64 = PC + 4 + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = S0.i64 + S0 = Reg(s0) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(S0.i64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SWAPPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # jump_addr = S0.i64; + # D0.i64 = PC + 4LL; + # PC = jump_addr.i64 + S0 = Reg(s0) + D0 = Reg(d0) + PC = Reg(pc) + # --- compiled pseudocode --- + jump_addr = S0.i64 + D0.i64 = PC + 4 + PC = Reg(jump_addr.i64) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_RFE_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = S0.i64 + S0 = Reg(s0) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(S0.i64) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # If SDST is VCC then VCCZ is undefined. VCC = Reg(vcc) + VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- # --- end pseudocode --- @@ -1051,9 +1107,10 @@ def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # If SDST is VCC then VCCZ is undefined. VCC = Reg(vcc) + VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- # --- end pseudocode --- @@ -1061,7 +1118,7 @@ def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _SOP1Op_S_BARRIER_SIGNAL(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BARRIER_SIGNAL(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if !InWorkgroup() then # elsif ((barrierNumber == -2) && !WAVE_STATUS.PRIV) then # elsif barrierNumber == 0 then @@ -1081,7 +1138,7 @@ def _SOP1Op_S_BARRIER_SIGNAL(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _SOP1Op_S_BARRIER_SIGNAL_ISFIRST(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_BARRIER_SIGNAL_ISFIRST(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if !InWorkgroup() then # SCC = 1'0U # elsif ((barrierNumber == -2) && !WAVE_STATUS.PRIV) then @@ -1108,7 +1165,7 @@ def _SOP1Op_S_BARRIER_SIGNAL_ISFIRST(s0, s1, s2, d0, scc, vcc, lane, exec_mask, result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOP1Op_S_GET_BARRIER_STATE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_GET_BARRIER_STATE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U({ 9'0, BARRIER_STATE[barrierNumber & 63].signalCnt.u7, 5'0, BARRIER_STATE[barrierNumber & D0 = Reg(d0) # --- compiled pseudocode --- @@ -1117,7 +1174,7 @@ def _SOP1Op_S_GET_BARRIER_STATE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_ALLOC_VGPR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_ALLOC_VGPR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # n = ReallocVgprs(32'I(S0[8 : 0].u32)); # if n < 0 then # SCC = 1'0U @@ -1138,7 +1195,7 @@ def _SOP1Op_S_ALLOC_VGPR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOP1Op_S_SLEEP_VAR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_SLEEP_VAR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S0[6:0] determines the sleep duration. The wave sleeps for (64*(S0[6:0]-1) … 64*S0[6:0]) clocks. The exact S0 = Reg(s0) # --- compiled pseudocode --- @@ -1147,7 +1204,7 @@ def _SOP1Op_S_SLEEP_VAR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': scc & 1} return result -def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -1162,7 +1219,7 @@ def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -1177,7 +1234,7 @@ def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1187,7 +1244,7 @@ def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -1202,7 +1259,7 @@ def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -1212,7 +1269,7 @@ def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -1222,7 +1279,7 @@ def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1232,7 +1289,7 @@ def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1242,7 +1299,7 @@ def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -1252,7 +1309,7 @@ def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -1262,7 +1319,7 @@ def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0[31 : 16].f16) S0 = Reg(s0) D0 = Reg(d0) @@ -1272,7 +1329,7 @@ def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -1287,7 +1344,7 @@ def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -1302,7 +1359,7 @@ def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -1312,7 +1369,7 @@ def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -1382,6 +1439,10 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_AND_NOT0_WREXEC_B64: _SOP1Op_S_AND_NOT0_WREXEC_B64, SOP1Op.S_AND_NOT1_WREXEC_B32: _SOP1Op_S_AND_NOT1_WREXEC_B32, SOP1Op.S_AND_NOT1_WREXEC_B64: _SOP1Op_S_AND_NOT1_WREXEC_B64, + SOP1Op.S_GETPC_B64: _SOP1Op_S_GETPC_B64, + SOP1Op.S_SETPC_B64: _SOP1Op_S_SETPC_B64, + SOP1Op.S_SWAPPC_B64: _SOP1Op_S_SWAPPC_B64, + SOP1Op.S_RFE_B64: _SOP1Op_S_RFE_B64, SOP1Op.S_SENDMSG_RTN_B32: _SOP1Op_S_SENDMSG_RTN_B32, SOP1Op.S_SENDMSG_RTN_B64: _SOP1Op_S_SENDMSG_RTN_B64, SOP1Op.S_BARRIER_SIGNAL: _SOP1Op_S_BARRIER_SIGNAL, @@ -1406,7 +1467,7 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_RNDNE_F16: _SOP1Op_S_RNDNE_F16, } -def _SOP2Op_S_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1423,7 +1484,7 @@ def _SOP2Op_S_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1440,7 +1501,7 @@ def _SOP2Op_S_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ADD_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i32 + S1.i32; # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -1457,7 +1518,7 @@ def _SOP2Op_S_ADD_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.i32 - S1.i32; # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); # D0.i32 = tmp.i32 @@ -1474,7 +1535,7 @@ def _SOP2Op_S_SUB_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1491,7 +1552,7 @@ def _SOP2Op_S_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - SCC.u32; # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1508,7 +1569,7 @@ def _SOP2Op_S_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 - S1.i32; # if D0.i32 < 0 then # D0.i32 = -D0.i32 @@ -1527,7 +1588,7 @@ def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 << S1[4 : 0].u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1541,7 +1602,7 @@ def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 << S1[5 : 0].u32); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1556,7 +1617,7 @@ def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 >> S1[4 : 0].u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1570,7 +1631,7 @@ def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 >> S1[5 : 0].u32); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1585,7 +1646,7 @@ def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); # SCC = D0.i32 != 0 S0 = Reg(s0) @@ -1599,7 +1660,7 @@ def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); # SCC = D0.i64 != 0LL S0 = Reg(s0) @@ -1614,7 +1675,7 @@ def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1631,7 +1692,7 @@ def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1648,7 +1709,7 @@ def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1665,7 +1726,7 @@ def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # D0.u32 = tmp.u32 @@ -1682,7 +1743,7 @@ def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 S0 = Reg(s0) @@ -1696,7 +1757,7 @@ def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) @@ -1710,7 +1771,7 @@ def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= S1.i32; # D0.i32 = SCC ? S0.i32 : S1.i32 S0 = Reg(s0) @@ -1724,7 +1785,7 @@ def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= S1.u32; # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) @@ -1738,7 +1799,7 @@ def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1752,7 +1813,7 @@ def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1767,7 +1828,7 @@ def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1781,7 +1842,7 @@ def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1796,7 +1857,7 @@ def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result['d0_64'] = True return result -def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1810,7 +1871,7 @@ def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1825,7 +1886,7 @@ def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 & S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1839,7 +1900,7 @@ def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 & S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1854,7 +1915,7 @@ def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 | S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1868,7 +1929,7 @@ def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 | S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1883,7 +1944,7 @@ def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1897,7 +1958,7 @@ def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ~(S0.u64 ^ S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1912,7 +1973,7 @@ def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & ~S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1926,7 +1987,7 @@ def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 & ~S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1941,7 +2002,7 @@ def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | ~S1.u32); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1955,7 +2016,7 @@ def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S0.u64 | ~S1.u64); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -1970,7 +2031,7 @@ def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); # SCC = D0.u32 != 0U S0 = Reg(s0) @@ -1984,7 +2045,7 @@ def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); # SCC = D0.i32 != 0 @@ -2001,7 +2062,7 @@ def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); # SCC = D0.u64 != 0ULL S0 = Reg(s0) @@ -2016,7 +2077,7 @@ def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); # SCC = D0.i64 != 0LL @@ -2034,7 +2095,7 @@ def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -2045,7 +2106,7 @@ def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -2057,7 +2118,7 @@ def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 * S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2068,7 +2129,7 @@ def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -2079,7 +2140,7 @@ def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -2090,7 +2151,7 @@ def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = SCC ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2102,7 +2163,7 @@ def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = SCC ? S0.u64 : S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2115,7 +2176,7 @@ def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2126,7 +2187,7 @@ def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2137,7 +2198,7 @@ def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2148,7 +2209,7 @@ def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { S1[15 : 0].u16, S0[31 : 16].u16 } S0 = Reg(s0) S1 = Reg(s1) @@ -2159,7 +2220,7 @@ def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2170,7 +2231,7 @@ def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2181,7 +2242,7 @@ def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -2218,7 +2279,7 @@ def _SOP2Op_S_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -2255,7 +2316,7 @@ def _SOP2Op_S_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2266,7 +2327,7 @@ def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -2278,7 +2339,7 @@ def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -2290,7 +2351,7 @@ def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -2301,7 +2362,7 @@ def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -2316,7 +2377,7 @@ def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2327,7 +2388,7 @@ def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2338,7 +2399,7 @@ def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -2375,7 +2436,7 @@ def _SOP2Op_S_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -2412,7 +2473,7 @@ def _SOP2Op_S_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2423,7 +2484,7 @@ def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -2434,7 +2495,7 @@ def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -2475,7 +2536,7 @@ def _SOP2Op_S_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -2516,7 +2577,7 @@ def _SOP2Op_S_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -2557,7 +2618,7 @@ def _SOP2Op_S_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -2598,7 +2659,7 @@ def _SOP2Op_S_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOP2Op_S_ADD_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_ADD_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = S0.u64 + S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2610,7 +2671,7 @@ def _SOP2Op_S_ADD_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _SOP2Op_S_SUB_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_SUB_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = S0.u64 - S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2622,7 +2683,7 @@ def _SOP2Op_S_SUB_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['d0_64'] = True return result -def _SOP2Op_S_MUL_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOP2Op_S_MUL_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = S0.u64 * S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2711,7 +2772,7 @@ SOP2Op_FUNCTIONS = { SOP2Op.S_MUL_U64: _SOP2Op_S_MUL_U64, } -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 == S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2722,7 +2783,7 @@ def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2733,7 +2794,7 @@ def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2744,7 +2805,7 @@ def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2755,7 +2816,7 @@ def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2766,7 +2827,7 @@ def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -2777,7 +2838,7 @@ def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 == S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2788,7 +2849,7 @@ def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2799,7 +2860,7 @@ def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2810,7 +2871,7 @@ def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2821,7 +2882,7 @@ def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2832,7 +2893,7 @@ def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -2843,7 +2904,7 @@ def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U S0 = Reg(s0) S1 = Reg(s1) @@ -2854,7 +2915,7 @@ def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U S0 = Reg(s0) S1 = Reg(s1) @@ -2865,7 +2926,7 @@ def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U S0 = Reg(s0) S1 = Reg(s1) @@ -2876,7 +2937,7 @@ def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U S0 = Reg(s0) S1 = Reg(s1) @@ -2887,7 +2948,7 @@ def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64 == S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2898,7 +2959,7 @@ def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -2909,7 +2970,7 @@ def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 < S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2920,7 +2981,7 @@ def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 < S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2931,7 +2992,7 @@ def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 == S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2942,7 +3003,7 @@ def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 == S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2953,7 +3014,7 @@ def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 <= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2964,7 +3025,7 @@ def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 <= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2975,7 +3036,7 @@ def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 > S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -2986,7 +3047,7 @@ def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 > S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -2997,7 +3058,7 @@ def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 <> S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -3008,7 +3069,7 @@ def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 <> S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -3019,7 +3080,7 @@ def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f32 >= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -3030,7 +3091,7 @@ def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = S0.f16 >= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -3041,7 +3102,7 @@ def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -3052,7 +3113,7 @@ def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -3063,7 +3124,7 @@ def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -3074,7 +3135,7 @@ def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -3085,7 +3146,7 @@ def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -3097,7 +3158,7 @@ def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -3109,7 +3170,7 @@ def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -3121,7 +3182,7 @@ def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -3133,7 +3194,7 @@ def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -3145,7 +3206,7 @@ def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -3157,7 +3218,7 @@ def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -3169,7 +3230,7 @@ def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -3181,7 +3242,7 @@ def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -3193,7 +3254,7 @@ def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -3205,7 +3266,7 @@ def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -3217,7 +3278,7 @@ def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': d0, 'scc': SCC._val & 1} return result -def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # SCC = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -3278,7 +3339,7 @@ SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_NLT_F16: _SOPCOp_S_CMP_NLT_F16, } -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -3288,7 +3349,7 @@ def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Do nothing - for use by tools only # --- compiled pseudocode --- @@ -3296,7 +3357,7 @@ def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': d0, 'scc': scc & 1} return result -def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if SCC then # D0.i32 = 32'I(signext(S0.i16)) # endif @@ -3310,7 +3371,7 @@ def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOPKOp_S_ADDK_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_ADDK_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.i32; # D0.i32 = D0.i32 + 32'I(signext(S0.i16)); # SCC = ((tmp[31] == S0.i16[15]) && (tmp[31] != D0.i32[31])); @@ -3326,7 +3387,7 @@ def _SOPKOp_S_ADDK_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': SCC._val & 1} return result -def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = D0.i32 * 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -3336,15 +3397,32 @@ def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result +def _SOPKOp_S_CALL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0.i64 = PC + 4LL; + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + D0 = Reg(d0) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + D0.i64 = PC + 4 + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + SOPKOp_FUNCTIONS = { SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, SOPKOp.S_VERSION: _SOPKOp_S_VERSION, SOPKOp.S_CMOVK_I32: _SOPKOp_S_CMOVK_I32, SOPKOp.S_ADDK_CO_I32: _SOPKOp_S_ADDK_CO_I32, SOPKOp.S_MULK_I32: _SOPKOp_S_MULK_I32, + SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64, } -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # for i in 0U : SIMM16.u16[3 : 0].u32 do # endfor SIMM16 = Reg(literal) @@ -3355,7 +3433,7 @@ def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _var result = {'d0': d0, 'scc': scc & 1} return result -def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # instruction may be omitted. For wave64 the compiler may not know the status of the EXEC mask and hence # // 1 cycle delay here # // 2 cycles delay here @@ -3367,16 +3445,20 @@ def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // PC passed into trap handler points to S_TRAP itself, + # PC = TBA.i64; # // trap base address + PC = Reg(pc) # --- compiled pseudocode --- # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _SOPPOp_S_BARRIER_WAIT(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _SOPPOp_S_BARRIER_WAIT(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // barrierBit 0: reserved # // barrierBit 1: workgroup # // barrierBit 2: trap @@ -3387,14 +3469,163 @@ def _SOPPOp_S_BARRIER_WAIT(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': d0, 'scc': scc & 1} return result +def _SOPPOp_S_BRANCH(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL; + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_SCC0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if SCC == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SCC = Reg(scc) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if SCC == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_SCC1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if SCC == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + SCC = Reg(scc) + SIMM16 = Reg(literal) + PC = Reg(pc) + # --- compiled pseudocode --- + if SCC == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': SCC._val & 1} + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_VCCZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # If VCCZ is 1 then jump to a constant offset relative to the current PC. + # if VCCZ.u1 == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + VCC = Reg(vcc) + SIMM16 = Reg(literal) + PC = Reg(pc) + VCCZ = Reg(1 if VCC._val == 0 else 0) + # --- compiled pseudocode --- + if VCCZ.u1 == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_VCCNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # If VCCZ is 0 then jump to a constant offset relative to the current PC. + # if VCCZ.u1 == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + VCC = Reg(vcc) + SIMM16 = Reg(literal) + PC = Reg(pc) + VCCZ = Reg(1 if VCC._val == 0 else 0) + # --- compiled pseudocode --- + if VCCZ.u1 == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_EXECZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if EXECZ.u1 == 1'1U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + EXEC = Reg(exec_mask) + SIMM16 = Reg(literal) + PC = Reg(pc) + EXECZ = Reg(1 if EXEC._val == 0 else 0) + # --- compiled pseudocode --- + if EXECZ.u1 == 1: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + +def _SOPPOp_S_CBRANCH_EXECNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # if EXECZ.u1 == 1'0U then + # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL + # else + # PC = PC + 4LL + # endif + EXEC = Reg(exec_mask) + SIMM16 = Reg(literal) + PC = Reg(pc) + EXECZ = Reg(1 if EXEC._val == 0 else 0) + # --- compiled pseudocode --- + if EXECZ.u1 == 0: + PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) + else: + PC = Reg(PC + 4) + # --- end pseudocode --- + result = {'d0': d0, 'scc': scc & 1} + if EXEC._val != exec_mask: result['exec'] = EXEC._val + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address + return result + SOPPOp_FUNCTIONS = { SOPPOp.S_NOP: _SOPPOp_S_NOP, SOPPOp.S_DELAY_ALU: _SOPPOp_S_DELAY_ALU, SOPPOp.S_TRAP: _SOPPOp_S_TRAP, SOPPOp.S_BARRIER_WAIT: _SOPPOp_S_BARRIER_WAIT, + SOPPOp.S_BRANCH: _SOPPOp_S_BRANCH, + SOPPOp.S_CBRANCH_SCC0: _SOPPOp_S_CBRANCH_SCC0, + SOPPOp.S_CBRANCH_SCC1: _SOPPOp_S_CBRANCH_SCC1, + SOPPOp.S_CBRANCH_VCCZ: _SOPPOp_S_CBRANCH_VCCZ, + SOPPOp.S_CBRANCH_VCCNZ: _SOPPOp_S_CBRANCH_VCCNZ, + SOPPOp.S_CBRANCH_EXECZ: _SOPPOp_S_CBRANCH_EXECZ, + SOPPOp.S_CBRANCH_EXECNZ: _SOPPOp_S_CBRANCH_EXECNZ, } -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -3404,7 +3635,7 @@ def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'U; # if WAVE64 then # // 64 lanes @@ -3447,7 +3678,7 @@ def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f64_to_i32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3457,7 +3688,7 @@ def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = i32_to_f64(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -3468,7 +3699,7 @@ def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -3478,7 +3709,7 @@ def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3488,7 +3719,7 @@ def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3498,7 +3729,7 @@ def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3508,7 +3739,7 @@ def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3518,7 +3749,7 @@ def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -3528,7 +3759,7 @@ def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) S0 = Reg(s0) D0 = Reg(d0) @@ -3538,7 +3769,7 @@ def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32)) S0 = Reg(s0) D0 = Reg(d0) @@ -3548,7 +3779,7 @@ def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f64_to_f32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3558,7 +3789,7 @@ def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = f32_to_f64(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3569,7 +3800,7 @@ def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[7 : 0].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3579,7 +3810,7 @@ def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[15 : 8].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3589,7 +3820,7 @@ def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[23 : 16].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3599,7 +3830,7 @@ def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[31 : 24].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3609,7 +3840,7 @@ def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f64_to_u32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3619,7 +3850,7 @@ def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = u32_to_f64(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -3630,7 +3861,7 @@ def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3641,7 +3872,7 @@ def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -3657,7 +3888,7 @@ def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -3673,7 +3904,7 @@ def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -3689,7 +3920,7 @@ def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b16 = S0.b16 S0 = Reg(s0) D0 = Reg(d0) @@ -3699,7 +3930,7 @@ def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + -floor(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3709,7 +3940,7 @@ def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3719,7 +3950,7 @@ def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -3734,7 +3965,7 @@ def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -3749,7 +3980,7 @@ def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -3764,7 +3995,7 @@ def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3774,7 +4005,7 @@ def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3784,7 +4015,7 @@ def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -3794,7 +4025,7 @@ def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception S0 = Reg(s0) @@ -3805,7 +4036,7 @@ def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3815,7 +4046,7 @@ def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / S0.f64 S0 = Reg(s0) D0 = Reg(d0) @@ -3826,7 +4057,7 @@ def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3837,7 +4068,7 @@ def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -3847,7 +4078,7 @@ def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -3858,7 +4089,7 @@ def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -3868,7 +4099,7 @@ def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -3878,7 +4109,7 @@ def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32 S0 = Reg(s0) D0 = Reg(d0) @@ -3888,7 +4119,7 @@ def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -3898,7 +4129,7 @@ def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -3918,7 +4149,7 @@ def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -3938,7 +4169,7 @@ def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -3958,7 +4189,7 @@ def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -3975,7 +4206,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -3993,7 +4224,7 @@ def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + -floor(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -4004,7 +4235,7 @@ def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -4021,7 +4252,7 @@ def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -4038,7 +4269,7 @@ def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # addr = SRC0.u32; # // Raw value from instruction # D0.b32 = VGPR[laneId][addr].b32 @@ -4052,7 +4283,7 @@ def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = u16_to_f16(S0.u16) S0 = Reg(s0) D0 = Reg(d0) @@ -4062,7 +4293,7 @@ def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = i16_to_f16(S0.i16) S0 = Reg(s0) D0 = Reg(d0) @@ -4072,7 +4303,7 @@ def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_u16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4082,7 +4313,7 @@ def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_i16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4092,7 +4323,7 @@ def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16 S0 = Reg(s0) D0 = Reg(d0) @@ -4102,7 +4333,7 @@ def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4112,7 +4343,7 @@ def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4122,7 +4353,7 @@ def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4132,7 +4363,7 @@ def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4142,7 +4373,7 @@ def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.f16 = S0.f16 # else @@ -4159,7 +4390,7 @@ def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.i16 = 16'0 # else @@ -4176,7 +4407,7 @@ def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -4191,7 +4422,7 @@ def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -4206,7 +4437,7 @@ def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4216,7 +4447,7 @@ def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -4231,7 +4462,7 @@ def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + -floor(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4241,7 +4472,7 @@ def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -4251,7 +4482,7 @@ def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -4261,11 +4492,7 @@ def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # if n <= 16'0 then - # elsif n >= 16'255 then - # else - # endif); +def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 16'0; # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); @@ -4274,12 +4501,6 @@ def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) tmp = Reg(0) # --- compiled pseudocode --- - if n <= 0: - pass - elif n >= 255: - pass - else: - pass tmp = Reg(0) tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) @@ -4288,7 +4509,7 @@ def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_snorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4298,7 +4519,7 @@ def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_unorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -4308,7 +4529,7 @@ def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.b32; # D0.b32 = S0.b32; # S0.b32 = tmp @@ -4323,7 +4544,7 @@ def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = D0.b16; # D0.b16 = S0.b16; # S0.b16 = tmp @@ -4338,7 +4559,7 @@ def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = ~S0.u16 S0 = Reg(s0) D0 = Reg(d0) @@ -4348,7 +4569,7 @@ def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -4358,7 +4579,7 @@ def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { 16'0, S0.u16 } S0 = Reg(s0) D0 = Reg(d0) @@ -4368,7 +4589,7 @@ def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if OPSEL[1 : 0].u2 == 2'0U then # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].fp8) # elsif OPSEL[1 : 0].u2 == 2'2U then @@ -4395,7 +4616,7 @@ def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if OPSEL[1 : 0].u2 == 2'0U then # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].bf8) # elsif OPSEL[1 : 0].u2 == 2'2U then @@ -4422,7 +4643,7 @@ def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) @@ -4438,7 +4659,7 @@ def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) @@ -4539,7 +4760,7 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_CVT_PK_F32_BF8: _VOP1Op_V_CVT_PK_F32_BF8, } -def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4553,7 +4774,7 @@ def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -4565,7 +4786,7 @@ def _VOP2Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4576,7 +4797,7 @@ def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4587,7 +4808,7 @@ def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S1.f32 - S0.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4598,7 +4819,7 @@ def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -4610,7 +4831,7 @@ def _VOP2Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = 0.0F @@ -4629,7 +4850,7 @@ def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -4640,7 +4861,7 @@ def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) S0 = Reg(s0) S1 = Reg(s1) @@ -4651,7 +4872,7 @@ def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -4662,7 +4883,7 @@ def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) S0 = Reg(s0) S1 = Reg(s1) @@ -4673,7 +4894,7 @@ def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -4684,7 +4905,7 @@ def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then # TRAPSTS.INVALID = 1 # endif; @@ -4722,7 +4943,7 @@ def _VOP2Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP2Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then # TRAPSTS.INVALID = 1 # endif; @@ -4760,7 +4981,7 @@ def _VOP2Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -4771,7 +4992,7 @@ def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -4782,7 +5003,7 @@ def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4793,7 +5014,7 @@ def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -4804,7 +5025,7 @@ def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -4841,7 +5062,7 @@ def _VOP2Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -4878,7 +5099,7 @@ def _VOP2Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 << S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4889,7 +5110,7 @@ def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4900,7 +5121,7 @@ def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4911,7 +5132,7 @@ def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4922,7 +5143,7 @@ def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4933,7 +5154,7 @@ def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4944,7 +5165,7 @@ def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4955,7 +5176,7 @@ def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 << S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -4967,7 +5188,7 @@ def _VOP2Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -4987,7 +5208,7 @@ def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -5007,7 +5228,7 @@ def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -5027,7 +5248,7 @@ def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -5038,7 +5259,7 @@ def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 - S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -5049,7 +5270,7 @@ def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S1.u32 - S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -5060,7 +5281,7 @@ def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -5071,7 +5292,7 @@ def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -5083,7 +5304,7 @@ def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -5095,7 +5316,7 @@ def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -5110,7 +5331,7 @@ def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _VOP2Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -5147,7 +5368,7 @@ def _VOP2Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -5184,7 +5405,7 @@ def _VOP2Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -5195,7 +5416,7 @@ def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -5206,7 +5427,7 @@ def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S1.f16 - S0.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -5217,7 +5438,7 @@ def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -5228,7 +5449,7 @@ def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -5239,7 +5460,7 @@ def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -5251,7 +5472,7 @@ def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -5263,7 +5484,7 @@ def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -5274,7 +5495,7 @@ def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16); # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) S0 = Reg(s0) @@ -5339,7 +5560,7 @@ VOP2Op_FUNCTIONS = { VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16, } -def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -5348,15 +5569,18 @@ def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -5365,15 +5589,18 @@ def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5381,15 +5608,18 @@ def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -5398,15 +5628,18 @@ def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5414,15 +5647,18 @@ def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5430,15 +5666,18 @@ def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5447,15 +5686,18 @@ def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -5464,15 +5706,18 @@ def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5481,15 +5726,18 @@ def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5498,15 +5746,18 @@ def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= @@ -5516,15 +5767,18 @@ def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5533,15 +5787,18 @@ def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -5551,15 +5808,18 @@ def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -5569,15 +5829,18 @@ def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -5586,15 +5849,18 @@ def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -5603,15 +5869,18 @@ def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5619,15 +5888,18 @@ def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -5636,15 +5908,18 @@ def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5652,15 +5927,18 @@ def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5668,15 +5946,18 @@ def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -5685,15 +5966,18 @@ def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -5702,15 +5986,18 @@ def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5719,15 +6006,18 @@ def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5736,15 +6026,18 @@ def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= @@ -5754,15 +6047,18 @@ def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -5771,15 +6067,18 @@ def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -5789,15 +6088,18 @@ def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -5807,15 +6109,18 @@ def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -5824,15 +6129,18 @@ def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -5841,15 +6149,18 @@ def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5857,15 +6168,18 @@ def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -5874,15 +6188,18 @@ def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5890,15 +6207,18 @@ def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -5906,15 +6226,18 @@ def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -5923,15 +6246,18 @@ def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -5940,15 +6266,18 @@ def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -5957,15 +6286,18 @@ def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -5974,15 +6306,18 @@ def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= @@ -5992,15 +6327,18 @@ def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -6009,15 +6347,18 @@ def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -6027,15 +6368,18 @@ def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -6045,15 +6389,18 @@ def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -6062,15 +6409,18 @@ def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -6079,15 +6429,18 @@ def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6095,15 +6448,18 @@ def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -6112,15 +6468,18 @@ def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -6129,15 +6488,18 @@ def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6145,15 +6507,18 @@ def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -6162,15 +6527,18 @@ def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -6179,15 +6547,18 @@ def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6195,15 +6566,18 @@ def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -6212,15 +6586,18 @@ def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -6229,15 +6606,18 @@ def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6245,15 +6625,18 @@ def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -6262,15 +6645,18 @@ def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -6279,15 +6665,18 @@ def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6295,15 +6684,18 @@ def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -6312,15 +6704,18 @@ def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -6329,15 +6724,18 @@ def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6345,15 +6743,18 @@ def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -6362,15 +6763,18 @@ def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -6379,15 +6783,18 @@ def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6395,15 +6802,18 @@ def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -6412,15 +6822,18 @@ def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -6429,15 +6842,18 @@ def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6445,15 +6861,18 @@ def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -6462,15 +6881,18 @@ def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -6479,15 +6901,18 @@ def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6495,15 +6920,18 @@ def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -6512,15 +6940,18 @@ def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -6529,15 +6960,18 @@ def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6545,15 +6979,18 @@ def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -6562,15 +6999,18 @@ def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -6579,15 +7019,18 @@ def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6595,15 +7038,18 @@ def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -6612,15 +7058,18 @@ def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -6629,15 +7078,18 @@ def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -6645,15 +7097,18 @@ def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -6690,6 +7145,7 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f16)): result = S1.u32[0] @@ -6708,9 +7164,11 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -6747,6 +7205,7 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f32)): result = S1.u32[0] @@ -6765,9 +7224,11 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -6804,6 +7265,7 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(S0.f64): result = S1.u32[0] @@ -6822,9 +7284,11 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 < S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6837,7 +7301,7 @@ def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f16 == S1.f16 S0 = Reg(s0) @@ -6851,7 +7315,7 @@ def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6864,7 +7328,7 @@ def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 > S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6877,7 +7341,7 @@ def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <> S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6890,7 +7354,7 @@ def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 >= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -6903,7 +7367,7 @@ def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -6916,7 +7380,7 @@ def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -6929,7 +7393,7 @@ def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -6943,7 +7407,7 @@ def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -6957,7 +7421,7 @@ def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -6971,7 +7435,7 @@ def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -6985,7 +7449,7 @@ def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -6999,7 +7463,7 @@ def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -7013,7 +7477,7 @@ def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 < S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7026,7 +7490,7 @@ def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f32 == S1.f32 S0 = Reg(s0) @@ -7040,7 +7504,7 @@ def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7053,7 +7517,7 @@ def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 > S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7066,7 +7530,7 @@ def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <> S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7079,7 +7543,7 @@ def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 >= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -7092,7 +7556,7 @@ def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -7105,7 +7569,7 @@ def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -7118,7 +7582,7 @@ def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -7132,7 +7596,7 @@ def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -7146,7 +7610,7 @@ def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -7160,7 +7624,7 @@ def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -7174,7 +7638,7 @@ def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -7188,7 +7652,7 @@ def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -7202,7 +7666,7 @@ def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 < S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7215,7 +7679,7 @@ def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f64 == S1.f64 S0 = Reg(s0) @@ -7229,7 +7693,7 @@ def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7242,7 +7706,7 @@ def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 > S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7255,7 +7719,7 @@ def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <> S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7268,7 +7732,7 @@ def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 >= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -7281,7 +7745,7 @@ def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -7294,7 +7758,7 @@ def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -7307,7 +7771,7 @@ def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -7321,7 +7785,7 @@ def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -7335,7 +7799,7 @@ def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -7349,7 +7813,7 @@ def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -7363,7 +7827,7 @@ def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -7377,7 +7841,7 @@ def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -7391,7 +7855,7 @@ def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 < S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7404,7 +7868,7 @@ def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i16 == S1.i16 S0 = Reg(s0) @@ -7418,7 +7882,7 @@ def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7431,7 +7895,7 @@ def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 > S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7444,7 +7908,7 @@ def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <> S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7457,7 +7921,7 @@ def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 >= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -7470,7 +7934,7 @@ def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 < S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7483,7 +7947,7 @@ def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u16 == S1.u16 S0 = Reg(s0) @@ -7497,7 +7961,7 @@ def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7510,7 +7974,7 @@ def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 > S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7523,7 +7987,7 @@ def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <> S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7536,7 +8000,7 @@ def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 >= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -7549,7 +8013,7 @@ def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7562,7 +8026,7 @@ def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i32 == S1.i32 S0 = Reg(s0) @@ -7576,7 +8040,7 @@ def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7589,7 +8053,7 @@ def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7602,7 +8066,7 @@ def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7615,7 +8079,7 @@ def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -7628,7 +8092,7 @@ def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7641,7 +8105,7 @@ def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u32 == S1.u32 S0 = Reg(s0) @@ -7655,7 +8119,7 @@ def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7668,7 +8132,7 @@ def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7681,7 +8145,7 @@ def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7694,7 +8158,7 @@ def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -7707,7 +8171,7 @@ def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 < S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7720,7 +8184,7 @@ def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i64 == S1.i64 S0 = Reg(s0) @@ -7734,7 +8198,7 @@ def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7747,7 +8211,7 @@ def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 > S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7760,7 +8224,7 @@ def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <> S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7773,7 +8237,7 @@ def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 >= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -7786,7 +8250,7 @@ def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 < S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -7799,7 +8263,7 @@ def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u64 == S1.u64 S0 = Reg(s0) @@ -7813,7 +8277,7 @@ def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -7826,7 +8290,7 @@ def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 > S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -7839,7 +8303,7 @@ def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -7852,7 +8316,7 @@ def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 >= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -7865,7 +8329,7 @@ def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -7918,7 +8382,7 @@ def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -7971,7 +8435,7 @@ def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -8024,7 +8488,7 @@ def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b32 = S0.b32 S0 = Reg(s0) D0 = Reg(d0) @@ -8034,7 +8498,7 @@ def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'U; # if WAVE64 then # // 64 lanes @@ -8077,7 +8541,7 @@ def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter if EXEC._val != exec_mask: result['exec'] = EXEC._val return result -def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f64_to_i32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8087,7 +8551,7 @@ def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = i32_to_f64(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -8098,7 +8562,7 @@ def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = i32_to_f32(S0.i32) S0 = Reg(s0) D0 = Reg(d0) @@ -8108,7 +8572,7 @@ def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8118,7 +8582,7 @@ def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f32_to_u32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8128,7 +8592,7 @@ def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8138,7 +8602,7 @@ def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = f32_to_f16(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8148,7 +8612,7 @@ def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f16_to_f32(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8158,7 +8622,7 @@ def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) S0 = Reg(s0) D0 = Reg(d0) @@ -8168,7 +8632,7 @@ def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = f32_to_i32(floor(S0.f32)) S0 = Reg(s0) D0 = Reg(d0) @@ -8178,7 +8642,7 @@ def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = f64_to_f32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8188,7 +8652,7 @@ def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = f32_to_f64(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8199,7 +8663,7 @@ def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[7 : 0].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8209,7 +8673,7 @@ def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[15 : 8].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8219,7 +8683,7 @@ def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[23 : 16].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8229,7 +8693,7 @@ def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = u32_to_f32(S0[31 : 24].u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8239,7 +8703,7 @@ def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = f64_to_u32(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8249,7 +8713,7 @@ def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = u32_to_f64(S0.u32) S0 = Reg(s0) D0 = Reg(d0) @@ -8260,7 +8724,7 @@ def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8271,7 +8735,7 @@ def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += 1.0 @@ -8287,7 +8751,7 @@ def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = floor(S0.f64 + 0.5); # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then # D0.f64 -= 1.0 @@ -8303,7 +8767,7 @@ def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = trunc(S0.f64); # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then # D0.f64 += -1.0 @@ -8319,7 +8783,7 @@ def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.b16 = S0.b16 S0 = Reg(s0) D0 = Reg(d0) @@ -8329,7 +8793,7 @@ def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + -floor(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8339,7 +8803,7 @@ def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8349,7 +8813,7 @@ def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += 1.0F @@ -8364,7 +8828,7 @@ def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = floor(S0.f32 + 0.5F); # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then # D0.f32 -= 1.0F @@ -8379,7 +8843,7 @@ def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = trunc(S0.f32); # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then # D0.f32 += -1.0F @@ -8394,7 +8858,7 @@ def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8404,7 +8868,7 @@ def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8414,7 +8878,7 @@ def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -8424,7 +8888,7 @@ def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32; # // Can only raise integer DIV_BY_ZERO exception S0 = Reg(s0) @@ -8435,7 +8899,7 @@ def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8445,7 +8909,7 @@ def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / S0.f64 S0 = Reg(s0) D0 = Reg(d0) @@ -8456,7 +8920,7 @@ def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = 1.0 / sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8467,7 +8931,7 @@ def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -8477,7 +8941,7 @@ def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = sqrt(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8488,7 +8952,7 @@ def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -8498,7 +8962,7 @@ def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -8508,7 +8972,7 @@ def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~S0.u32 S0 = Reg(s0) D0 = Reg(d0) @@ -8518,7 +8982,7 @@ def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32[31 : 0] = S0.u32[0 : 31] S0 = Reg(s0) D0 = Reg(d0) @@ -8528,7 +8992,7 @@ def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -8548,7 +9012,7 @@ def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if no ones are found # for i in 0 : 31 do @@ -8568,7 +9032,7 @@ def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = -1; # // Set if all bits are the same # for i in 1 : 31 do @@ -8588,7 +9052,7 @@ def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.i32 = 0 # else @@ -8605,7 +9069,7 @@ def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then # D0.f64 = S0.f64 # else @@ -8623,7 +9087,7 @@ def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + -floor(S0.f64) S0 = Reg(s0) D0 = Reg(d0) @@ -8634,7 +9098,7 @@ def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.i32 = 0 # else @@ -8651,7 +9115,7 @@ def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then # D0.f32 = S0.f32 # else @@ -8668,7 +9132,7 @@ def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # addr = SRC0.u32; # // Raw value from instruction # D0.b32 = VGPR[laneId][addr].b32 @@ -8682,7 +9146,7 @@ def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = u16_to_f16(S0.u16) S0 = Reg(s0) D0 = Reg(d0) @@ -8692,7 +9156,7 @@ def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = i16_to_f16(S0.i16) S0 = Reg(s0) D0 = Reg(d0) @@ -8702,7 +9166,7 @@ def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_u16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8712,7 +9176,7 @@ def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_i16(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8722,7 +9186,7 @@ def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16 S0 = Reg(s0) D0 = Reg(d0) @@ -8732,7 +9196,7 @@ def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8742,7 +9206,7 @@ def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8752,7 +9216,7 @@ def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8762,7 +9226,7 @@ def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8772,7 +9236,7 @@ def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.f16 = S0.f16 # else @@ -8789,7 +9253,7 @@ def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then # D0.i16 = 16'0 # else @@ -8806,7 +9270,7 @@ def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += -16'1.0 @@ -8821,7 +9285,7 @@ def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16); # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then # D0.f16 += 16'1.0 @@ -8836,7 +9300,7 @@ def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = trunc(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8846,7 +9310,7 @@ def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = floor(S0.f16 + 16'0.5); # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then # D0.f16 -= 16'1.0 @@ -8861,7 +9325,7 @@ def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + -floor(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8871,7 +9335,7 @@ def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -8881,7 +9345,7 @@ def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) S0 = Reg(s0) D0 = Reg(d0) @@ -8891,11 +9355,7 @@ def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): - # if n <= 16'0 then - # elsif n >= 16'255 then - # else - # endif); +def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 16'0; # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); @@ -8904,12 +9364,6 @@ def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) tmp = Reg(0) # --- compiled pseudocode --- - if n <= 0: - pass - elif n >= 255: - pass - else: - pass tmp = Reg(0) tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) @@ -8918,7 +9372,7 @@ def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = f16_to_snorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8928,7 +9382,7 @@ def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = f16_to_unorm(S0.f16) S0 = Reg(s0) D0 = Reg(d0) @@ -8938,7 +9392,7 @@ def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = ~S0.u16 S0 = Reg(s0) D0 = Reg(d0) @@ -8948,7 +9402,7 @@ def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(signext(S0.i16)) S0 = Reg(s0) D0 = Reg(d0) @@ -8958,7 +9412,7 @@ def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0 = { 16'0, S0.u16 } S0 = Reg(s0) D0 = Reg(d0) @@ -8968,7 +9422,7 @@ def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if OPSEL[1 : 0].u2 == 2'0U then # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].fp8) # elsif OPSEL[1 : 0].u2 == 2'2U then @@ -8995,7 +9449,7 @@ def _VOP3Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if OPSEL[1 : 0].u2 == 2'0U then # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].bf8) # elsif OPSEL[1 : 0].u2 == 2'2U then @@ -9022,7 +9476,7 @@ def _VOP3Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) @@ -9038,7 +9492,7 @@ def _VOP3Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) @@ -9054,7 +9508,7 @@ def _VOP3Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9068,7 +9522,7 @@ def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 + S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -9080,7 +9534,7 @@ def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 + S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9091,7 +9545,7 @@ def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 - S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9102,7 +9556,7 @@ def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S1.f32 - S0.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9113,7 +9567,7 @@ def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -9125,7 +9579,7 @@ def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = 0.0F @@ -9144,7 +9598,7 @@ def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -9155,7 +9609,7 @@ def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) S0 = Reg(s0) S1 = Reg(s1) @@ -9166,7 +9620,7 @@ def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -9177,7 +9631,7 @@ def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) S0 = Reg(s0) S1 = Reg(s1) @@ -9188,7 +9642,7 @@ def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -9199,7 +9653,7 @@ def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then # TRAPSTS.INVALID = 1 # endif; @@ -9237,7 +9691,7 @@ def _VOP3Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then # TRAPSTS.INVALID = 1 # endif; @@ -9275,7 +9729,7 @@ def _VOP3Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -9286,7 +9740,7 @@ def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -9297,7 +9751,7 @@ def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9308,7 +9762,7 @@ def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9319,7 +9773,7 @@ def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -9356,7 +9810,7 @@ def _VOP3Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -9393,7 +9847,7 @@ def _VOP3Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 << S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9404,7 +9858,7 @@ def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S1.u32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9415,7 +9869,7 @@ def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = (S1.i32 >> S0[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9426,7 +9880,7 @@ def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 & S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9437,7 +9891,7 @@ def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9448,7 +9902,7 @@ def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9459,7 +9913,7 @@ def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ~(S0.u32 ^ S1.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9470,7 +9924,7 @@ def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 << S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -9482,7 +9936,7 @@ def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9493,7 +9947,7 @@ def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 - S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9504,7 +9958,7 @@ def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S1.u32 - S0.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9515,7 +9969,7 @@ def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, D0.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -9526,7 +9980,7 @@ def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # prev_mode = ROUND_MODE; # tmp[15 : 0].f16 = f32_to_f16(S0.f32); # tmp[31 : 16].f16 = f32_to_f16(S1.f32); @@ -9541,7 +9995,7 @@ def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -9578,7 +10032,7 @@ def _VOP3Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -9615,7 +10069,7 @@ def _VOP3Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 + S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9626,7 +10080,7 @@ def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 - S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9637,7 +10091,7 @@ def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S1.f16 - S0.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9648,7 +10102,7 @@ def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -9659,7 +10113,7 @@ def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, D0.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -9670,7 +10124,7 @@ def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) S0 = Reg(s0) S1 = Reg(s1) @@ -9681,7 +10135,7 @@ def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # // DX9 rules, 0.0 * x = 0.0 # D0.f32 = S2.f32 @@ -9701,7 +10155,7 @@ def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -9713,7 +10167,7 @@ def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -9725,7 +10179,7 @@ def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9774,7 +10228,7 @@ def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = cubemap S coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9816,7 +10270,7 @@ def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = cubemap T coordinate. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9851,7 +10305,7 @@ def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // D0.f = 2.0 * cubemap major axis. # // XYZ coordinate is given in (S0.f, S1.f, S2.f). # // S0.f = x @@ -9879,7 +10333,7 @@ def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) S0 = Reg(s0) S1 = Reg(s1) @@ -9891,7 +10345,7 @@ def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) S0 = Reg(s0) @@ -9906,7 +10360,7 @@ def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) S0 = Reg(s0) S1 = Reg(s1) @@ -9918,7 +10372,7 @@ def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = fma(S0.f32, S1.f32, S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -9930,7 +10384,7 @@ def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = fma(S0.f64, S1.f64, S2.f64) S0 = Reg(s0) S1 = Reg(s1) @@ -9943,7 +10397,7 @@ def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result['d0_64'] = True return result -def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); @@ -9964,7 +10418,7 @@ def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0]) & 0xffffffffLL) S0 = Reg(s0) S1 = Reg(s1) @@ -9976,7 +10430,7 @@ def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0] * 8U)) & 0xffffffffLL) S0 = Reg(s0) S1 = Reg(s1) @@ -9988,7 +10442,7 @@ def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if ((S1.f32 == -MAX_FLOAT_F32) || (64'F(S1.f32) == -INF) || isNAN(64'F(S1.f32)) || (S2.f32 <= 0.0F) || # isNAN(64'F(S2.f32))) then # D0.f32 = -MAX_FLOAT_F32 @@ -10008,7 +10462,7 @@ def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10020,7 +10474,7 @@ def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10032,7 +10486,7 @@ def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10044,7 +10498,7 @@ def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10056,7 +10510,7 @@ def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then # D0.i32 = v_max_i32(S1.i32, S2.i32) # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then @@ -10079,7 +10533,7 @@ def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then # D0.u32 = v_max_u32(S1.u32, S2.u32) # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then @@ -10102,7 +10556,7 @@ def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -10126,7 +10580,7 @@ def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10138,7 +10592,7 @@ def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); @@ -10158,7 +10612,7 @@ def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 S0 = Reg(s0) @@ -10171,7 +10625,7 @@ def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); # D0.u32 = tmp @@ -10188,7 +10642,7 @@ def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f32) ^ sign(S2.f32)); # if isNAN(64'F(S2.f32)) then # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) @@ -10241,7 +10695,7 @@ def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f64) ^ sign(S2.f64)); # if isNAN(S2.f64) then # D0.f64 = cvtToQuietNAN(S2.f64) @@ -10295,7 +10749,7 @@ def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3Op_V_MIN3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_min_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10307,7 +10761,7 @@ def _VOP3Op_V_MIN3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_max_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10319,7 +10773,7 @@ def _VOP3Op_V_MAX3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_min_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10331,7 +10785,7 @@ def _VOP3Op_V_MIN3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_max_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10343,7 +10797,7 @@ def _VOP3Op_V_MAX3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10355,7 +10809,7 @@ def _VOP3Op_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10367,7 +10821,7 @@ def _VOP3Op_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_minimum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10379,7 +10833,7 @@ def _VOP3Op_V_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_maximum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10391,7 +10845,7 @@ def _VOP3Op_V_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then # D0.f32 = v_min3_num_f32(S0.f32, S1.f32, S2.f32) # elsif v_max3_num_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then @@ -10418,7 +10872,7 @@ def _VOP3Op_V_MED3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then # D0.f16 = v_min3_num_f16(S0.f16, S1.f16, S2.f16) # elsif v_max3_num_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then @@ -10445,7 +10899,7 @@ def _VOP3Op_V_MED3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if VCC.u64[laneId] then # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) # else @@ -10467,7 +10921,7 @@ def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if VCC.u64[laneId] then # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) # else @@ -10490,7 +10944,7 @@ def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result -def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # // UNSIGNED comparison # tmp = S2.u32; # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); @@ -10514,7 +10968,7 @@ def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -10536,7 +10990,7 @@ def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['d0_64'] = True return result -def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); @@ -10558,7 +11012,7 @@ def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); @@ -10579,7 +11033,7 @@ def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10591,7 +11045,7 @@ def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 + S2.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -10603,7 +11057,25 @@ def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_PERM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # D0[31 : 24] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[31 : 24]); + # D0[23 : 16] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[23 : 16]); + # D0[15 : 8] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[15 : 8]); + # D0[7 : 0] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[7 : 0]) + S0 = Reg(s0) + S1 = Reg(s1) + S2 = Reg(s2) + D0 = Reg(d0) + # --- compiled pseudocode --- + D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24]) + D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16]) + D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8]) + D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0]) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + return result + +def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10615,7 +11087,7 @@ def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10627,7 +11099,7 @@ def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10639,7 +11111,7 @@ def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = fma(S0.f16, S1.f16, S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -10651,7 +11123,7 @@ def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) S0 = Reg(s0) S1 = Reg(s1) @@ -10663,7 +11135,7 @@ def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -10675,7 +11147,7 @@ def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) S0 = Reg(s0) S1 = Reg(s1) @@ -10687,7 +11159,7 @@ def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -10699,7 +11171,7 @@ def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then # D0.i16 = v_max_i16(S1.i16, S2.i16) # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then @@ -10722,7 +11194,7 @@ def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then # D0.u16 = v_max_u16(S1.u16, S2.u16) # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then @@ -10745,7 +11217,7 @@ def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 * S1.i16 + S2.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -10757,7 +11229,7 @@ def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # sign_out = (sign(S1.f16) ^ sign(S2.f16)); # if isNAN(64'F(S2.f16)) then # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) @@ -10802,7 +11274,7 @@ def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 + S1.u32 + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10814,7 +11286,7 @@ def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10826,7 +11298,7 @@ def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10838,7 +11310,7 @@ def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (S0.u32 | S1.u32 | S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10850,7 +11322,7 @@ def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -10862,7 +11334,7 @@ def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -10874,7 +11346,7 @@ def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = VCC.u64[laneId] ? S1.u16 : S0.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -10888,7 +11360,7 @@ def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10900,7 +11372,7 @@ def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) S0 = Reg(s0) S1 = Reg(s1) @@ -10912,7 +11384,7 @@ def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10924,7 +11396,7 @@ def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) S0 = Reg(s0) S1 = Reg(s1) @@ -10936,7 +11408,7 @@ def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f16; # tmp += S0[15 : 0].f16 * S1[15 : 0].f16; # tmp += S0[31 : 16].f16 * S1[31 : 16].f16; @@ -10955,7 +11427,7 @@ def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.bf16; # tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16; # tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16; @@ -10974,7 +11446,7 @@ def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_max_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10986,7 +11458,7 @@ def _VOP3Op_V_MINMAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXMIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_min_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -10998,7 +11470,7 @@ def _VOP3Op_V_MAXMIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINMAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINMAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_max_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -11010,7 +11482,7 @@ def _VOP3Op_V_MINMAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXMIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXMIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_min_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -11022,7 +11494,7 @@ def _VOP3Op_V_MAXMIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINIMUMMAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUMMAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_maximum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -11034,7 +11506,7 @@ def _VOP3Op_V_MINIMUMMAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXIMUMMINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUMMINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = v_minimum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) S0 = Reg(s0) S1 = Reg(s1) @@ -11046,7 +11518,7 @@ def _VOP3Op_V_MAXIMUMMINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINIMUMMAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUMMAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_maximum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -11058,7 +11530,7 @@ def _VOP3Op_V_MINIMUMMAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXIMUMMINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUMMINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = v_minimum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) S0 = Reg(s0) S1 = Reg(s1) @@ -11070,7 +11542,7 @@ def _VOP3Op_V_MAXIMUMMINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = pow(2.0F, S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -11080,7 +11552,7 @@ def _VOP3Op_V_S_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = pow(16'2.0, S0.f16); # D0[31 : 16] = 16'0x0 S0 = Reg(s0) @@ -11092,7 +11564,7 @@ def _VOP3Op_V_S_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = log2(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -11102,7 +11574,7 @@ def _VOP3Op_V_S_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = log2(S0.f16); # D0[31 : 16] = 16'0x0 S0 = Reg(s0) @@ -11114,7 +11586,7 @@ def _VOP3Op_V_S_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / S0.f32 S0 = Reg(s0) D0 = Reg(d0) @@ -11124,7 +11596,7 @@ def _VOP3Op_V_S_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / S0.f16; # D0[31 : 16] = 16'0x0 S0 = Reg(s0) @@ -11136,7 +11608,7 @@ def _VOP3Op_V_S_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = 1.0F / sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -11146,7 +11618,7 @@ def _VOP3Op_V_S_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = 16'1.0 / sqrt(S0.f16); # D0[31 : 16] = 16'0x0 S0 = Reg(s0) @@ -11158,7 +11630,7 @@ def _VOP3Op_V_S_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = sqrt(S0.f32) S0 = Reg(s0) D0 = Reg(d0) @@ -11168,7 +11640,7 @@ def _VOP3Op_V_S_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_S_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_S_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f16 = sqrt(S0.f16); # D0[31 : 16] = 16'0x0 S0 = Reg(s0) @@ -11180,7 +11652,7 @@ def _VOP3Op_V_S_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 + S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11191,7 +11663,7 @@ def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 - S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11202,7 +11674,7 @@ def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 * S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11213,7 +11685,7 @@ def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[31 : 16] = 16'B(v_cvt_i16_f32(S1.f32)); # tmp[15 : 0] = 16'B(v_cvt_i16_f32(S0.f32)); @@ -11227,7 +11699,7 @@ def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[31 : 16] = 16'B(v_cvt_u16_f32(S1.f32)); # tmp[15 : 0] = 16'B(v_cvt_u16_f32(S0.f32)); @@ -11241,7 +11713,7 @@ def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11252,7 +11724,7 @@ def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11263,7 +11735,7 @@ def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -11274,7 +11746,7 @@ def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11285,7 +11757,7 @@ def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 + S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11296,7 +11768,7 @@ def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = S0.i16 - S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -11307,7 +11779,7 @@ def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0[31 : 16].f16 = S1.f16; # D0[15 : 0].f16 = S0.f16 S0 = Reg(s0) @@ -11320,7 +11792,7 @@ def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); @@ -11334,7 +11806,7 @@ def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); @@ -11348,7 +11820,7 @@ def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f32 = S0.f32 * 2.0F ** S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11359,7 +11831,7 @@ def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11370,7 +11842,7 @@ def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32; # for i in 0 : 31 do # tmp += S0[i].u32; @@ -11390,7 +11862,7 @@ def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); @@ -11404,7 +11876,7 @@ def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); @@ -11418,7 +11890,7 @@ def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lit result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = u32_to_u16(S0.u32); # tmp[31 : 16].u16 = u32_to_u16(S1.u32); @@ -11432,7 +11904,7 @@ def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = i32_to_i16(S0.i32); # tmp[31 : 16].i16 = i32_to_i16(S1.i32); @@ -11446,7 +11918,7 @@ def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': d0, 'scc': scc & 1} return result -def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 - S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11457,7 +11929,7 @@ def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = S0.i32 + S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11468,7 +11940,7 @@ def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.f64 = S0.f64 * 2.0 ** S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -11480,7 +11952,7 @@ def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR result['d0_64'] = True return result -def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = S0.u32 * S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -11491,7 +11963,7 @@ def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -11502,7 +11974,7 @@ def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) S0 = Reg(s0) S1 = Reg(s1) @@ -11513,7 +11985,7 @@ def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 << S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11524,7 +11996,7 @@ def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11535,7 +12007,7 @@ def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i16 = (S1.i16 >> S0[3 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11546,7 +12018,7 @@ def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64 = (S1.u64 >> S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11558,7 +12030,7 @@ def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.i64 = (S1.i64 >> S0[5 : 0].u32) S0 = Reg(s0) S1 = Reg(s1) @@ -11570,7 +12042,7 @@ def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_MINIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then # TRAPSTS.INVALID = 1 # endif; @@ -11612,7 +12084,7 @@ def _VOP3Op_V_MINIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_MAXIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then # TRAPSTS.INVALID = 1 # endif; @@ -11654,7 +12126,7 @@ def _VOP3Op_V_MAXIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['d0_64'] = True return result -def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare lane : 32'U; # if WAVE32 then # lane = S1.u32[4 : 0].u32; @@ -11677,7 +12149,7 @@ def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S0.u16 & S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -11688,7 +12160,7 @@ def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S0.u16 | S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -11699,7 +12171,7 @@ def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _ result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S0.u16 ^ S1.u16) S0 = Reg(s0) S1 = Reg(s1) @@ -11710,7 +12182,7 @@ def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -11751,7 +12223,7 @@ def _VOP3Op_V_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then # TRAPSTS.INVALID = 1 # endif; @@ -11792,7 +12264,7 @@ def _VOP3Op_V_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -11833,7 +12305,7 @@ def _VOP3Op_V_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3Op_V_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3Op_V_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then # TRAPSTS.INVALID = 1 # endif; @@ -12205,6 +12677,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_MQSAD_U32_U8: _VOP3Op_V_MQSAD_U32_U8, VOP3Op.V_XOR3_B32: _VOP3Op_V_XOR3_B32, VOP3Op.V_MAD_U16: _VOP3Op_V_MAD_U16, + VOP3Op.V_PERM_B32: _VOP3Op_V_PERM_B32, VOP3Op.V_XAD_U32: _VOP3Op_V_XAD_U32, VOP3Op.V_LSHL_ADD_U32: _VOP3Op_V_LSHL_ADD_U32, VOP3Op.V_ADD_LSHL_U32: _VOP3Op_V_ADD_LSHL_U32, @@ -12292,7 +12765,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_MAXIMUM_F16: _VOP3Op_V_MAXIMUM_F16, } -def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -12312,7 +12785,7 @@ def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12332,7 +12805,7 @@ def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12352,7 +12825,7 @@ def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC = 0x0LL; # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then # D0.f32 = NAN.f32 @@ -12415,7 +12888,7 @@ def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC = 0x0LL; # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then # D0.f64 = NAN.f64 @@ -12479,7 +12952,7 @@ def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result['d0_64'] = True return result -def _VOP3SDOp_V_MAD_CO_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_MAD_CO_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) S0 = Reg(s0) S1 = Reg(s1) @@ -12496,7 +12969,7 @@ def _VOP3SDOp_V_MAD_CO_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d1'] = D1._val & 1 return result -def _VOP3SDOp_V_MAD_CO_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_MAD_CO_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) S0 = Reg(s0) S1 = Reg(s1) @@ -12513,7 +12986,7 @@ def _VOP3SDOp_V_MAD_CO_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera result['d1'] = D1._val & 1 return result -def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = 64'U(S0.u32) + 64'U(S1.u32); # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. @@ -12533,7 +13006,7 @@ def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S0.u32 - S1.u32; # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12553,7 +13026,7 @@ def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['vcc_lane'] = (VCC._val >> lane) & 1 return result -def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S1.u32 - S0.u32; # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. @@ -12586,7 +13059,7 @@ VOP3SDOp_FUNCTIONS = { VOP3SDOp.V_SUBREV_CO_U32: _VOP3SDOp_V_SUBREV_CO_U32, } -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; @@ -12604,7 +13077,7 @@ def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; # D0.b32 = tmp.b32 @@ -12620,7 +13093,7 @@ def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; @@ -12637,7 +13110,7 @@ def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; @@ -12654,7 +13127,7 @@ def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -12670,7 +13143,7 @@ def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -12686,7 +13159,7 @@ def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); # D0.b32 = tmp.b32 @@ -12702,7 +13175,7 @@ def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; @@ -12719,7 +13192,7 @@ def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; @@ -12736,7 +13209,7 @@ def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; @@ -12754,7 +13227,7 @@ def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; @@ -12771,7 +13244,7 @@ def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; @@ -12788,7 +13261,7 @@ def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; @@ -12805,7 +13278,7 @@ def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; @@ -12822,7 +13295,7 @@ def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); @@ -12840,7 +13313,7 @@ def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; @@ -12857,7 +13330,7 @@ def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; @@ -12874,7 +13347,7 @@ def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); @@ -12893,7 +13366,7 @@ def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); @@ -12916,7 +13389,7 @@ def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.u32; # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); @@ -12947,7 +13420,7 @@ def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); @@ -12966,7 +13439,7 @@ def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = v_min_num_f16(S0[15 : 0].f16, S1[15 : 0].f16); # tmp[31 : 16].f16 = v_min_num_f16(S0[31 : 16].f16, S1[31 : 16].f16); @@ -12983,7 +13456,7 @@ def _VOP3POp_V_PK_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = v_max_num_f16(S0[15 : 0].f16, S1[15 : 0].f16); # tmp[31 : 16].f16 = v_max_num_f16(S0[31 : 16].f16, S1[31 : 16].f16); @@ -13000,7 +13473,7 @@ def _VOP3POp_V_PK_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = v_minimum_f16(S0[15 : 0].f16, S1[15 : 0].f16); # tmp[31 : 16].f16 = v_minimum_f16(S0[31 : 16].f16, S1[31 : 16].f16); @@ -13017,7 +13490,7 @@ def _VOP3POp_V_PK_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_PK_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_PK_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # declare tmp : 32'B; # tmp[15 : 0].f16 = v_maximum_f16(S0[15 : 0].f16, S1[15 : 0].f16); # tmp[31 : 16].f16 = v_maximum_f16(S0[31 : 16].f16, S1[31 : 16].f16); @@ -13034,7 +13507,7 @@ def _VOP3POp_V_PK_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_F32_FP8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_F32_FP8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += 32'F(S0[7 : 0].fp8) * 32'F(S1[7 : 0].bf8); # tmp += 32'F(S0[15 : 8].fp8) * 32'F(S1[15 : 8].bf8); @@ -13057,7 +13530,7 @@ def _VOP3POp_V_DOT4_F32_FP8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_F32_BF8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_F32_BF8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += 32'F(S0[7 : 0].bf8) * 32'F(S1[7 : 0].fp8); # tmp += 32'F(S0[15 : 8].bf8) * 32'F(S1[15 : 8].fp8); @@ -13080,7 +13553,7 @@ def _VOP3POp_V_DOT4_F32_BF8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_F32_FP8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_F32_FP8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += 32'F(S0[7 : 0].fp8) * 32'F(S1[7 : 0].fp8); # tmp += 32'F(S0[15 : 8].fp8) * 32'F(S1[15 : 8].fp8); @@ -13103,7 +13576,7 @@ def _VOP3POp_V_DOT4_F32_FP8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter result = {'d0': D0._val, 'scc': scc & 1} return result -def _VOP3POp_V_DOT4_F32_BF8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOP3POp_V_DOT4_F32_BF8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # tmp = S2.f32; # tmp += 32'F(S0[7 : 0].bf8) * 32'F(S1[7 : 0].bf8); # tmp += 32'F(S0[15 : 8].bf8) * 32'F(S1[15 : 8].bf8); @@ -13158,7 +13631,7 @@ VOP3POp_FUNCTIONS = { VOP3POp.V_DOT4_F32_BF8_BF8: _VOP3POp_V_DOT4_F32_BF8_BF8, } -def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 < S1.f16; # // D0 = VCC in VOPC encoding. @@ -13167,6 +13640,7 @@ def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- @@ -13174,9 +13648,11 @@ def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f16 == S1.f16; # // D0 = VCC in VOPC encoding. @@ -13185,6 +13661,7 @@ def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- @@ -13192,9 +13669,11 @@ def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13202,6 +13681,7 @@ def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- @@ -13209,9 +13689,11 @@ def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f16 > S1.f16; # // D0 = VCC in VOPC encoding. @@ -13220,6 +13702,7 @@ def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- @@ -13227,9 +13710,11 @@ def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 <> S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13237,6 +13722,7 @@ def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- @@ -13244,9 +13730,11 @@ def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f16 >= S1.f16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13254,6 +13742,7 @@ def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- @@ -13261,9 +13750,11 @@ def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -13272,6 +13763,7 @@ def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- @@ -13279,9 +13771,11 @@ def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); # // D0 = VCC in VOPC encoding. @@ -13290,6 +13784,7 @@ def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- @@ -13297,9 +13792,11 @@ def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -13308,6 +13805,7 @@ def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- @@ -13315,9 +13813,11 @@ def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -13326,6 +13826,7 @@ def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- @@ -13333,9 +13834,11 @@ def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= @@ -13345,6 +13848,7 @@ def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- @@ -13352,9 +13856,11 @@ def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -13363,6 +13869,7 @@ def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- @@ -13370,9 +13877,11 @@ def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != @@ -13382,6 +13891,7 @@ def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- @@ -13389,9 +13899,11 @@ def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= @@ -13401,6 +13913,7 @@ def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- @@ -13408,9 +13921,11 @@ def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 < S1.f32; # // D0 = VCC in VOPC encoding. @@ -13419,6 +13934,7 @@ def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- @@ -13426,9 +13942,11 @@ def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f32 == S1.f32; # // D0 = VCC in VOPC encoding. @@ -13437,6 +13955,7 @@ def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- @@ -13444,9 +13963,11 @@ def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13454,6 +13975,7 @@ def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- @@ -13461,9 +13983,11 @@ def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f32 > S1.f32; # // D0 = VCC in VOPC encoding. @@ -13472,6 +13996,7 @@ def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- @@ -13479,9 +14004,11 @@ def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 <> S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13489,6 +14016,7 @@ def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- @@ -13496,9 +14024,11 @@ def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f32 >= S1.f32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13506,6 +14036,7 @@ def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- @@ -13513,9 +14044,11 @@ def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -13524,6 +14057,7 @@ def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- @@ -13531,9 +14065,11 @@ def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); # // D0 = VCC in VOPC encoding. @@ -13542,6 +14078,7 @@ def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- @@ -13549,9 +14086,11 @@ def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -13560,6 +14099,7 @@ def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- @@ -13567,9 +14107,11 @@ def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -13578,6 +14120,7 @@ def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- @@ -13585,9 +14128,11 @@ def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= @@ -13597,6 +14142,7 @@ def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- @@ -13604,9 +14150,11 @@ def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -13615,6 +14163,7 @@ def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- @@ -13622,9 +14171,11 @@ def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != @@ -13634,6 +14185,7 @@ def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- @@ -13641,9 +14193,11 @@ def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= @@ -13653,6 +14207,7 @@ def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- @@ -13660,9 +14215,11 @@ def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 < S1.f64; # // D0 = VCC in VOPC encoding. @@ -13671,6 +14228,7 @@ def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- @@ -13678,9 +14236,11 @@ def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.f64 == S1.f64; # // D0 = VCC in VOPC encoding. @@ -13689,6 +14249,7 @@ def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- @@ -13696,9 +14257,11 @@ def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13706,6 +14269,7 @@ def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- @@ -13713,9 +14277,11 @@ def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.f64 > S1.f64; # // D0 = VCC in VOPC encoding. @@ -13724,6 +14290,7 @@ def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- @@ -13731,9 +14298,11 @@ def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 <> S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13741,6 +14310,7 @@ def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- @@ -13748,9 +14318,11 @@ def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.f64 >= S1.f64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13758,6 +14330,7 @@ def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- @@ -13765,9 +14338,11 @@ def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -13776,6 +14351,7 @@ def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- @@ -13783,9 +14359,11 @@ def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); # // D0 = VCC in VOPC encoding. @@ -13794,6 +14372,7 @@ def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- @@ -13801,9 +14380,11 @@ def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < # // D0 = VCC in VOPC encoding. @@ -13812,6 +14393,7 @@ def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- @@ -13819,9 +14401,11 @@ def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == # // D0 = VCC in VOPC encoding. @@ -13830,6 +14414,7 @@ def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- @@ -13837,9 +14422,11 @@ def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # VCC or a scalar register. # D0.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= @@ -13849,6 +14436,7 @@ def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- @@ -13856,9 +14444,11 @@ def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > # // D0 = VCC in VOPC encoding. @@ -13867,6 +14457,7 @@ def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- @@ -13874,9 +14465,11 @@ def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != @@ -13886,6 +14479,7 @@ def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- @@ -13893,9 +14487,11 @@ def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC # D0.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= @@ -13905,6 +14501,7 @@ def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- @@ -13912,9 +14509,11 @@ def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 < S1.i16; # // D0 = VCC in VOPC encoding. @@ -13923,6 +14522,7 @@ def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- @@ -13930,9 +14530,11 @@ def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i16 == S1.i16; # // D0 = VCC in VOPC encoding. @@ -13941,6 +14543,7 @@ def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- @@ -13948,9 +14551,11 @@ def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 <= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -13958,6 +14563,7 @@ def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- @@ -13965,9 +14571,11 @@ def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 > S1.i16; # // D0 = VCC in VOPC encoding. @@ -13976,6 +14584,7 @@ def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- @@ -13983,9 +14592,11 @@ def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i16 <> S1.i16; # // D0 = VCC in VOPC encoding. @@ -13994,6 +14605,7 @@ def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- @@ -14001,9 +14613,11 @@ def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i16 >= S1.i16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14011,6 +14625,7 @@ def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- @@ -14018,9 +14633,11 @@ def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 < S1.u16; # // D0 = VCC in VOPC encoding. @@ -14029,6 +14646,7 @@ def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- @@ -14036,9 +14654,11 @@ def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u16 == S1.u16; # // D0 = VCC in VOPC encoding. @@ -14047,6 +14667,7 @@ def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- @@ -14054,9 +14675,11 @@ def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 <= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14064,6 +14687,7 @@ def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- @@ -14071,9 +14695,11 @@ def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 > S1.u16; # // D0 = VCC in VOPC encoding. @@ -14082,6 +14708,7 @@ def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- @@ -14089,9 +14716,11 @@ def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u16 <> S1.u16; # // D0 = VCC in VOPC encoding. @@ -14100,6 +14729,7 @@ def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- @@ -14107,9 +14737,11 @@ def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u16 >= S1.u16; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14117,6 +14749,7 @@ def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- @@ -14124,9 +14757,11 @@ def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 < S1.i32; # // D0 = VCC in VOPC encoding. @@ -14135,6 +14770,7 @@ def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- @@ -14142,9 +14778,11 @@ def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i32 == S1.i32; # // D0 = VCC in VOPC encoding. @@ -14153,6 +14791,7 @@ def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- @@ -14160,9 +14799,11 @@ def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 <= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14170,6 +14811,7 @@ def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- @@ -14177,9 +14819,11 @@ def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 > S1.i32; # // D0 = VCC in VOPC encoding. @@ -14188,6 +14832,7 @@ def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- @@ -14195,9 +14840,11 @@ def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i32 <> S1.i32; # // D0 = VCC in VOPC encoding. @@ -14206,6 +14853,7 @@ def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- @@ -14213,9 +14861,11 @@ def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i32 >= S1.i32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14223,6 +14873,7 @@ def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- @@ -14230,9 +14881,11 @@ def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 < S1.u32; # // D0 = VCC in VOPC encoding. @@ -14241,6 +14894,7 @@ def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- @@ -14248,9 +14902,11 @@ def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u32 == S1.u32; # // D0 = VCC in VOPC encoding. @@ -14259,6 +14915,7 @@ def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- @@ -14266,9 +14923,11 @@ def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 <= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14276,6 +14935,7 @@ def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- @@ -14283,9 +14943,11 @@ def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 > S1.u32; # // D0 = VCC in VOPC encoding. @@ -14294,6 +14956,7 @@ def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- @@ -14301,9 +14964,11 @@ def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u32 <> S1.u32; # // D0 = VCC in VOPC encoding. @@ -14312,6 +14977,7 @@ def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- @@ -14319,9 +14985,11 @@ def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u32 >= S1.u32; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14329,6 +14997,7 @@ def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- @@ -14336,9 +15005,11 @@ def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 < S1.i64; # // D0 = VCC in VOPC encoding. @@ -14347,6 +15018,7 @@ def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- @@ -14354,9 +15026,11 @@ def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.i64 == S1.i64; # // D0 = VCC in VOPC encoding. @@ -14365,6 +15039,7 @@ def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- @@ -14372,9 +15047,11 @@ def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 <= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14382,6 +15059,7 @@ def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- @@ -14389,9 +15067,11 @@ def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 > S1.i64; # // D0 = VCC in VOPC encoding. @@ -14400,6 +15080,7 @@ def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- @@ -14407,9 +15088,11 @@ def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.i64 <> S1.i64; # // D0 = VCC in VOPC encoding. @@ -14418,6 +15101,7 @@ def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- @@ -14425,9 +15109,11 @@ def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.i64 >= S1.i64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14435,6 +15121,7 @@ def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- @@ -14442,9 +15129,11 @@ def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 < S1.u64; # // D0 = VCC in VOPC encoding. @@ -14453,6 +15142,7 @@ def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- @@ -14460,9 +15150,11 @@ def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a # D0.u64[laneId] = S0.u64 == S1.u64; # // D0 = VCC in VOPC encoding. @@ -14471,6 +15163,7 @@ def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- @@ -14478,9 +15171,11 @@ def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 <= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14488,6 +15183,7 @@ def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- @@ -14495,9 +15191,11 @@ def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 > S1.u64; # // D0 = VCC in VOPC encoding. @@ -14506,6 +15204,7 @@ def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- @@ -14513,9 +15212,11 @@ def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC # D0.u64[laneId] = S0.u64 <> S1.u64; # // D0 = VCC in VOPC encoding. @@ -14524,6 +15225,7 @@ def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- @@ -14531,9 +15233,11 @@ def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u64[laneId] = S0.u64 >= S1.u64; # // D0 = VCC in VOPC encoding. S0 = Reg(s0) @@ -14541,6 +15245,7 @@ def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- D0.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- @@ -14548,9 +15253,11 @@ def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -14587,6 +15294,7 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f16)): result = S1.u32[0] @@ -14606,9 +15314,11 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -14645,6 +15355,7 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(F(S0.f32)): result = S1.u32[0] @@ -14664,9 +15375,11 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. @@ -14703,6 +15416,7 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0 = Reg(d0) VCC = Reg(vcc) laneId = lane + PC = Reg(pc) # --- compiled pseudocode --- if isSignalNAN(S0.f64): result = S1.u32[0] @@ -14722,9 +15436,11 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True + _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 + result['new_pc'] = _pc # absolute byte address return result -def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 < S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14737,7 +15453,7 @@ def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f16 == S1.f16 S0 = Reg(s0) @@ -14751,7 +15467,7 @@ def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14764,7 +15480,7 @@ def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 > S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14777,7 +15493,7 @@ def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 <> S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14790,7 +15506,7 @@ def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f16 >= S1.f16 S0 = Reg(s0) S1 = Reg(s1) @@ -14803,7 +15519,7 @@ def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -14816,7 +15532,7 @@ def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) S0 = Reg(s0) S1 = Reg(s1) @@ -14829,7 +15545,7 @@ def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -14843,7 +15559,7 @@ def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -14857,7 +15573,7 @@ def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 > S1.f16); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -14871,7 +15587,7 @@ def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -14885,7 +15601,7 @@ def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 == S1.f16); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -14899,7 +15615,7 @@ def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f16 < S1.f16); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -14913,7 +15629,7 @@ def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 < S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14926,7 +15642,7 @@ def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f32 == S1.f32 S0 = Reg(s0) @@ -14940,7 +15656,7 @@ def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14953,7 +15669,7 @@ def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 > S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14966,7 +15682,7 @@ def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 <> S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14979,7 +15695,7 @@ def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f32 >= S1.f32 S0 = Reg(s0) S1 = Reg(s1) @@ -14992,7 +15708,7 @@ def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -15005,7 +15721,7 @@ def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) S0 = Reg(s0) S1 = Reg(s1) @@ -15018,7 +15734,7 @@ def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -15032,7 +15748,7 @@ def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -15046,7 +15762,7 @@ def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 > S1.f32); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -15060,7 +15776,7 @@ def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -15074,7 +15790,7 @@ def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 == S1.f32); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -15088,7 +15804,7 @@ def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f32 < S1.f32); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -15102,7 +15818,7 @@ def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 < S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15115,7 +15831,7 @@ def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.f64 == S1.f64 S0 = Reg(s0) @@ -15129,7 +15845,7 @@ def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15142,7 +15858,7 @@ def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 > S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15155,7 +15871,7 @@ def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 <> S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15168,7 +15884,7 @@ def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.f64 >= S1.f64 S0 = Reg(s0) S1 = Reg(s1) @@ -15181,7 +15897,7 @@ def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -15194,7 +15910,7 @@ def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) S0 = Reg(s0) S1 = Reg(s1) @@ -15207,7 +15923,7 @@ def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); # // With NAN inputs this is not the same operation as < S0 = Reg(s0) @@ -15221,7 +15937,7 @@ def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); # // With NAN inputs this is not the same operation as == S0 = Reg(s0) @@ -15235,7 +15951,7 @@ def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 > S1.f64); # // With NAN inputs this is not the same operation as <= S0 = Reg(s0) @@ -15249,7 +15965,7 @@ def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); # // With NAN inputs this is not the same operation as > S0 = Reg(s0) @@ -15263,7 +15979,7 @@ def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 == S1.f64); # // With NAN inputs this is not the same operation as != S0 = Reg(s0) @@ -15277,7 +15993,7 @@ def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = !(S0.f64 < S1.f64); # // With NAN inputs this is not the same operation as >= S0 = Reg(s0) @@ -15291,7 +16007,7 @@ def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 < S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15304,7 +16020,7 @@ def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i16 == S1.i16 S0 = Reg(s0) @@ -15318,7 +16034,7 @@ def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15331,7 +16047,7 @@ def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 > S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15344,7 +16060,7 @@ def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 <> S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15357,7 +16073,7 @@ def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i16 >= S1.i16 S0 = Reg(s0) S1 = Reg(s1) @@ -15370,7 +16086,7 @@ def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 < S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15383,7 +16099,7 @@ def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u16 == S1.u16 S0 = Reg(s0) @@ -15397,7 +16113,7 @@ def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15410,7 +16126,7 @@ def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 > S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15423,7 +16139,7 @@ def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 <> S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15436,7 +16152,7 @@ def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u16 >= S1.u16 S0 = Reg(s0) S1 = Reg(s1) @@ -15449,7 +16165,7 @@ def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 < S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15462,7 +16178,7 @@ def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i32 == S1.i32 S0 = Reg(s0) @@ -15476,7 +16192,7 @@ def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15489,7 +16205,7 @@ def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 > S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15502,7 +16218,7 @@ def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 <> S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15515,7 +16231,7 @@ def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i32 >= S1.i32 S0 = Reg(s0) S1 = Reg(s1) @@ -15528,7 +16244,7 @@ def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 < S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15541,7 +16257,7 @@ def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u32 == S1.u32 S0 = Reg(s0) @@ -15555,7 +16271,7 @@ def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15568,7 +16284,7 @@ def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 > S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15581,7 +16297,7 @@ def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 <> S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15594,7 +16310,7 @@ def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u32 >= S1.u32 S0 = Reg(s0) S1 = Reg(s1) @@ -15607,7 +16323,7 @@ def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 < S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15620,7 +16336,7 @@ def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.i64 == S1.i64 S0 = Reg(s0) @@ -15634,7 +16350,7 @@ def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15647,7 +16363,7 @@ def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 > S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15660,7 +16376,7 @@ def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 <> S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15673,7 +16389,7 @@ def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.i64 >= S1.i64 S0 = Reg(s0) S1 = Reg(s1) @@ -15686,7 +16402,7 @@ def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 < S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15699,7 +16415,7 @@ def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC # EXEC.u64[laneId] = S0.u64 == S1.u64 S0 = Reg(s0) @@ -15713,7 +16429,7 @@ def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15726,7 +16442,7 @@ def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 > S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15739,7 +16455,7 @@ def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 <> S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15752,7 +16468,7 @@ def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # EXEC.u64[laneId] = S0.u64 >= S1.u64 S0 = Reg(s0) S1 = Reg(s1) @@ -15765,7 +16481,7 @@ def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -15818,7 +16534,7 @@ def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. @@ -15871,7 +16587,7 @@ def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, result['exec_lane'] = (EXEC._val >> lane) & 1 return result -def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0): +def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # S1.u[0] value is a signaling NAN. # S1.u[1] value is a quiet NAN. # S1.u[2] value is negative infinity. diff --git a/extra/assembly/amd/emu.py b/extra/assembly/amd/emu.py index dbbd33b820..7e9dbd014b 100644 --- a/extra/assembly/amd/emu.py +++ b/extra/assembly/amd/emu.py @@ -205,21 +205,11 @@ def exec_scalar(st: WaveState, inst: Inst) -> int: compiled = _get_compiled() inst_type = type(inst) - # SOPP: control flow (not ALU) + # SOPP: special cases for control flow that has no pseudocode if inst_type is SOPP: op = inst.op if op == SOPPOp.S_ENDPGM: return -1 if op == SOPPOp.S_BARRIER: return -2 - if op == SOPPOp.S_BRANCH: return _sext(inst.simm16, 16) - if op == SOPPOp.S_CBRANCH_SCC0: return _sext(inst.simm16, 16) if st.scc == 0 else 0 - if op == SOPPOp.S_CBRANCH_SCC1: return _sext(inst.simm16, 16) if st.scc == 1 else 0 - if op == SOPPOp.S_CBRANCH_VCCZ: return _sext(inst.simm16, 16) if (st.vcc & 0xffffffff) == 0 else 0 - if op == SOPPOp.S_CBRANCH_VCCNZ: return _sext(inst.simm16, 16) if (st.vcc & 0xffffffff) != 0 else 0 - if op == SOPPOp.S_CBRANCH_EXECZ: return _sext(inst.simm16, 16) if st.exec_mask == 0 else 0 - if op == SOPPOp.S_CBRANCH_EXECNZ: return _sext(inst.simm16, 16) if st.exec_mask != 0 else 0 - # Valid SOPP range is 0-61 (max defined opcode); anything above is invalid - if op > 61: raise NotImplementedError(f"Invalid SOPP opcode {op}") - return 0 # waits, hints, nops # SMEM: memory loads (not ALU) if inst_type is SMEM: @@ -229,46 +219,39 @@ def exec_scalar(st: WaveState, inst: Inst) -> int: for i in range(cnt): st.wsgpr(inst.sdata + i, mem_read((addr + i * 4) & 0xffffffffffffffff, 4)) return 0 - # SOP1: special handling for ops not in pseudocode - if inst_type is SOP1: - op = SOP1Op(inst.op) - # S_GETPC_B64: Get program counter (PC is stored as byte offset, convert from words) - if op == SOP1Op.S_GETPC_B64: - pc_bytes = st.pc * 4 # PC is in words, convert to bytes - st.wsgpr64(inst.sdst, pc_bytes) - return 0 - # S_SETPC_B64: Set program counter to source value (indirect jump) - # Returns delta such that st.pc + inst_words + delta = target_words - if op == SOP1Op.S_SETPC_B64: - target_bytes = st.rsrc64(inst.ssrc0, 0) - target_words = target_bytes // 4 - inst_words = 1 # SOP1 is always 1 word - return target_words - st.pc - inst_words - # Get op enum and lookup compiled function if inst_type is SOP1: op_cls, ssrc0, sdst = SOP1Op, inst.ssrc0, inst.sdst elif inst_type is SOP2: op_cls, ssrc0, sdst = SOP2Op, inst.ssrc0, inst.sdst elif inst_type is SOPC: op_cls, ssrc0, sdst = SOPCOp, inst.ssrc0, None elif inst_type is SOPK: op_cls, ssrc0, sdst = SOPKOp, inst.sdst, inst.sdst # sdst is both src and dst + elif inst_type is SOPP: op_cls, ssrc0, sdst = SOPPOp, None, None else: raise NotImplementedError(f"Unknown scalar type {inst_type}") - op = op_cls(inst.op) + # SOPP has gaps in the opcode enum - treat unknown opcodes as no-ops + try: op = op_cls(inst.op) + except ValueError: + if inst_type is SOPP: return 0 + raise fn = compiled.get(op_cls, {}).get(op) - if fn is None: raise NotImplementedError(f"{op.name} not in pseudocode") + if fn is None: + # SOPP instructions without pseudocode (waits, hints, nops) are no-ops + if inst_type is SOPP: return 0 + raise NotImplementedError(f"{op.name} not in pseudocode") # Build context - handle 64-bit ops that need 64-bit source reads # 64-bit source ops: name ends with _B64, _I64, _U64 or contains _U64, _I64 before last underscore is_64bit_s0 = op.name.endswith(('_B64', '_I64', '_U64')) or '_U64_' in op.name or '_I64_' in op.name is_64bit_s0s1 = op_cls is SOPCOp and op in (SOPCOp.S_CMP_EQ_U64, SOPCOp.S_CMP_LG_U64) - s0 = st.rsrc64(ssrc0, 0) if is_64bit_s0 or is_64bit_s0s1 else (st.rsrc(ssrc0, 0) if inst_type != SOPK else st.rsgpr(inst.sdst)) + s0 = st.rsrc64(ssrc0, 0) if is_64bit_s0 or is_64bit_s0s1 else (st.rsrc(ssrc0, 0) if inst_type not in (SOPK, SOPP) else (st.rsgpr(inst.sdst) if inst_type is SOPK else 0)) is_64bit_sop2 = is_64bit_s0 and inst_type is SOP2 s1 = st.rsrc64(inst.ssrc1, 0) if (is_64bit_sop2 or is_64bit_s0s1) else (st.rsrc(inst.ssrc1, 0) if inst_type in (SOP2, SOPC) else inst.simm16 if inst_type is SOPK else 0) d0 = st.rsgpr64(sdst) if (is_64bit_s0 or is_64bit_s0s1) and sdst is not None else (st.rsgpr(sdst) if sdst is not None else 0) exec_mask = st.exec_mask - literal = inst.simm16 if inst_type is SOPK else st.literal + literal = inst.simm16 if inst_type in (SOPK, SOPP) else st.literal - # Execute compiled function - result = fn(s0, s1, 0, d0, st.scc, st.vcc, 0, exec_mask, literal, None, {}) + # Execute compiled function - pass PC in bytes for instructions that need it + pc_bytes = st.pc * 4 + result = fn(s0, s1, 0, d0, st.scc, st.vcc, 0, exec_mask, literal, None, {}, pc=pc_bytes) # Apply results if sdst is not None: @@ -278,7 +261,11 @@ def exec_scalar(st: WaveState, inst: Inst) -> int: st.wsgpr(sdst, result['d0']) if 'scc' in result: st.scc = result['scc'] if 'exec' in result: st.exec_mask = result['exec'] - if 'pc_delta' in result: return result['pc_delta'] + if 'new_pc' in result: + # Convert absolute byte address to word delta + # new_pc is where we want to go, st.pc is current position, inst._words will be added after + new_pc_words = result['new_pc'] // 4 + return new_pc_words - st.pc - 1 # -1 because emulator adds inst_words (1 for scalar) return 0 def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = None) -> None: @@ -402,20 +389,6 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No op_cls, op, src0, src1, src2, vdst = VOPCOp, VOPCOp(inst.op), inst.src0, inst.src1, None, inst.vdst else: op_cls, op, src0, src1, src2, vdst = VOP3Op, VOP3Op(inst.op), inst.src0, inst.src1, inst.src2, inst.vdst - # V_PERM_B32: byte permutation - not in pseudocode PDF, implement directly - # D0[byte_i] = selector[byte_i] < 8 ? {src0, src1}[selector[byte_i]] : (selector[byte_i] >= 0xD ? 0xFF : 0x00) - if op == VOP3Op.V_PERM_B32: - s0, s1, s2 = st.rsrc(inst.src0, lane), st.rsrc(inst.src1, lane), st.rsrc(inst.src2, lane) - # Combine src1 and src0 into 8-byte value: src1 is bytes 0-3, src0 is bytes 4-7 - combined = (s1 & 0xffffffff) | ((s0 & 0xffffffff) << 32) - result = 0 - for i in range(4): # 4 result bytes - sel = (s2 >> (i * 8)) & 0xff # byte selector for this position - if sel <= 7: result |= (((combined >> (sel * 8)) & 0xff) << (i * 8)) # select byte from combined - elif sel >= 0xd: result |= (0xff << (i * 8)) # 0xD-0xF: constant 0xFF - # else 0x8-0xC: constant 0x00 (already 0) - V[vdst] = result & 0xffffffff - return elif inst_type is VOPC: op = VOPCOp(inst.op) # For 16-bit VOPC, vsrc1 uses same encoding as VOP2 16-bit: bit 7 selects hi(1) or lo(0) half diff --git a/extra/assembly/amd/pcode.py b/extra/assembly/amd/pcode.py index 05b23d7528..c230384324 100644 --- a/extra/assembly/amd/pcode.py +++ b/extra/assembly/amd/pcode.py @@ -341,7 +341,7 @@ def F(x): if isinstance(x, int): return _f32(x) # int -> interpret as f32 bits if isinstance(x, TypedView): return x # preserve TypedView for bit-pattern checks return float(x) # already a float or float-like -signext = lambda x: x +signext = lambda x: int(x) # sign-extend to full width - already handled by Python's arbitrary precision ints pack = lambda hi, lo: ((int(hi) & 0xffff) << 16) | (int(lo) & 0xffff) pack32 = lambda hi, lo: ((int(hi) & 0xffffffff) << 32) | (int(lo) & 0xffffffff) _pack, _pack32 = pack, pack32 # Aliases for internal use @@ -519,6 +519,17 @@ class TypedView: def __bool__(s): return bool(int(s)) + # Allow chained type access like jump_addr.i64 when jump_addr is already a TypedView + # These just return self or convert appropriately + @property + def i64(s): return s if s._bits == 64 and s._signed else int(s) + @property + def u64(s): return s if s._bits == 64 and not s._signed else int(s) & MASK64 + @property + def i32(s): return s if s._bits == 32 and s._signed else _sext(int(s) & MASK32, 32) + @property + def u32(s): return s if s._bits == 32 and not s._signed else int(s) & MASK32 + class Reg: """GPU register: D0.f32 = S0.f32 + S1.f32 just works.""" __slots__ = ('_val',) @@ -542,6 +553,7 @@ class Reg: bf16 = property(lambda s: TypedView(s, 16, is_float=True, is_bf16=True), lambda s, v: setattr(s, '_val', (s._val & 0xffff0000) | ((v if isinstance(v, int) else _ibf16(float(v))) & 0xffff))) u8 = property(lambda s: TypedView(s, 8)) i8 = property(lambda s: TypedView(s, 8, signed=True)) + u1 = property(lambda s: TypedView(s, 1)) # single bit def __getitem__(s, key): if isinstance(key, slice): return SliceProxy(s, int(key.start), int(key.stop)) @@ -664,7 +676,7 @@ def compile_pseudocode(pseudocode: str) -> str: def _assign(lhs: str, rhs: str) -> str: """Generate assignment. Bare tmp/SCC/etc get wrapped in Reg().""" - if lhs in ('tmp', 'SCC', 'VCC', 'EXEC', 'D0', 'D1', 'saveexec'): + if lhs in ('tmp', 'SCC', 'VCC', 'EXEC', 'D0', 'D1', 'saveexec', 'PC'): return f"{lhs} = Reg({rhs})" return f"{lhs} = {rhs}" @@ -801,14 +813,14 @@ INST_PATTERN = re.compile(r'^([SV]_[A-Z0-9_]+)\s+(\d+)\s*$', re.M) # Patterns that can't be handled by the DSL (require special handling in emu.py) UNSUPPORTED = ['SGPR[', 'V_SWAP', 'eval ', 'FATAL_HALT', 'HW_REGISTERS', - 'PC =', 'PC=', 'PC+', '= PC', 'vscnt', 'vmcnt', 'expcnt', 'lgkmcnt', + 'vscnt', 'vmcnt', 'expcnt', 'lgkmcnt', 'CVT_OFF_TABLE', 'ThreadMask', 'S1[i', 'C.i32', 'S[i]', 'in[', '2.0 / PI', 'if n.', 'DST.u32', 'addrd = DST', 'addr = DST'] # Malformed pseudocode from PDF def extract_pseudocode(text: str) -> str | None: """Extract pseudocode from an instruction description snippet.""" - lines, result, depth = text.split('\n'), [], 0 + lines, result, depth, in_lambda = text.split('\n'), [], 0, 0 for line in lines: s = line.strip() if not s: continue @@ -817,12 +829,17 @@ def extract_pseudocode(text: str) -> str | None: # Skip document headers (RDNA or CDNA) if s.startswith('"RDNA') or s.startswith('AMD ') or s.startswith('CDNA'): continue if s.startswith('Notes') or s.startswith('Functional examples'): break + # Track lambda definitions (e.g., BYTE_PERMUTE = lambda(data, sel) (...)) + if '= lambda(' in s: in_lambda += 1; continue + if in_lambda > 0: + if s.endswith(');'): in_lambda -= 1 + continue if s.startswith('if '): depth += 1 elif s.startswith('endif'): depth = max(0, depth - 1) if s.endswith('.') and not any(p in s for p in ['D0', 'D1', 'S0', 'S1', 'S2', 'SCC', 'VCC', 'tmp', '=']): continue if re.match(r'^[a-z].*\.$', s) and '=' not in s: continue is_code = ( - any(p in s for p in ['D0.', 'D1.', 'S0.', 'S1.', 'S2.', 'SCC =', 'SCC ?', 'VCC', 'EXEC', 'tmp =', 'tmp[', 'lane =']) or + any(p in s for p in ['D0.', 'D1.', 'S0.', 'S1.', 'S2.', 'SCC =', 'SCC ?', 'VCC', 'EXEC', 'tmp =', 'tmp[', 'lane =', 'PC =']) or any(p in s for p in ['D0[', 'D1[', 'S0[', 'S1[', 'S2[']) or s.startswith(('if ', 'else', 'elsif', 'endif', 'declare ', 'for ', 'endfor', '//')) or re.match(r'^[a-z_]+\s*=', s) or re.match(r'^[a-z_]+\[', s) or (depth > 0 and '=' in s) @@ -1043,10 +1060,12 @@ from extra.assembly.amd.pcode import * is_div_scale = 'DIV_SCALE' in op.name # VOP3SD instructions that write VCC per-lane (either via VCC.u64[laneId] or by setting VCC = 0/1) has_sdst = cls_name == 'VOP3SDOp' and ('VCC.u64[laneId]' in pc or is_div_scale) + # Instructions that use/modify PC + has_pc = 'PC' in pc # Generate function with indented body fn_name = f"_{cls_name}_{op.name}" - lines.append(f"def {fn_name}(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0):") + lines.append(f"def {fn_name}(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0):") # Add original pseudocode as comment for pc_line in pc.split('\n'): lines.append(f" # {pc_line}") @@ -1057,14 +1076,21 @@ from extra.assembly.amd.pcode import * ('SCC', 'Reg(scc)'), ('VCC', 'Reg(vcc)'), ('EXEC', 'Reg(exec_mask)'), ('tmp', 'Reg(0)'), ('saveexec', 'Reg(exec_mask)'), ('laneId', 'lane'), ('SIMM16', 'Reg(literal)'), ('SIMM32', 'Reg(literal)'), - ('SRC0', 'Reg(src0_idx)'), ('VDST', 'Reg(vdst_idx)')] + ('SRC0', 'Reg(src0_idx)'), ('VDST', 'Reg(vdst_idx)'), + ('PC', 'Reg(pc)')] # PC is passed in as byte address used = {name for name, _ in regs if name in combined} # EXEC_LO/EXEC_HI need EXEC if 'EXEC_LO' in combined or 'EXEC_HI' in combined: used.add('EXEC') + # VCCZ/EXECZ need VCC/EXEC + if 'VCCZ' in combined: used.add('VCC') + if 'EXECZ' in combined: used.add('EXEC') for name, init in regs: if name in used: lines.append(f" {name} = {init}") if 'EXEC_LO' in combined: lines.append(" EXEC_LO = SliceProxy(EXEC, 31, 0)") if 'EXEC_HI' in combined: lines.append(" EXEC_HI = SliceProxy(EXEC, 63, 32)") + # VCCZ = 1 if VCC == 0, EXECZ = 1 if EXEC == 0 + if 'VCCZ' in combined: lines.append(" VCCZ = Reg(1 if VCC._val == 0 else 0)") + if 'EXECZ' in combined: lines.append(" EXECZ = Reg(1 if EXEC._val == 0 else 0)") # Add compiled pseudocode with markers lines.append(" # --- compiled pseudocode ---") for line in code.split('\n'): @@ -1088,6 +1114,11 @@ from extra.assembly.amd.pcode import * lines.append(" result['d0_64'] = True") if has_d1: lines.append(" result['d1'] = D1._val & 1") + if has_pc: + # Return new PC as absolute byte address, emulator will compute delta + # Handle negative values (backward jumps): PC._val is stored as unsigned, convert to signed + lines.append(" _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000") + lines.append(" result['new_pc'] = _pc # absolute byte address") lines.append(" return result") lines.append("") diff --git a/extra/assembly/amd/test/test_emu.py b/extra/assembly/amd/test/test_emu.py index 0c8b0c0517..e9055070ad 100644 --- a/extra/assembly/amd/test/test_emu.py +++ b/extra/assembly/amd/test/test_emu.py @@ -2590,6 +2590,30 @@ class TestNewPcodeHelpers(unittest.TestCase): # byte 3: sel=0x0C = 12 -> 0x00 self.assertEqual(result, 0x00FFFFFF, f"Expected 0x00FFFFFF, got 0x{result:08x}") + def test_v_perm_b32_sign_extend(self): + """V_PERM_B32: Test sign extension selectors 8-11.""" + # Combined = {S0, S1} where S1 is bytes 0-3, S0 is bytes 4-7 + # s0 = 0x00008000 -> byte 5 (0x80) has sign bit set + # s1 = 0x80000080 -> bytes 1 (0x00) and 3 (0x80) have sign bits, byte 0 (0x80) has sign bit + # Combined = 0x00008000_80000080 + # selector = 0x08090A0B -> sign of bytes 1,3,5,7 + # byte 0: sel=0x0B -> sign of byte 7 (0x00) -> 0x00 + # byte 1: sel=0x0A -> sign of byte 5 (0x80) -> 0xFF + # byte 2: sel=0x09 -> sign of byte 3 (0x80) -> 0xFF + # byte 3: sel=0x08 -> sign of byte 1 (0x00) -> 0x00 + instructions = [ + s_mov_b32(s[0], 0x00008000), + s_mov_b32(s[1], 0x80000080), + s_mov_b32(s[2], 0x08090A0B), + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], s[1]), + v_mov_b32_e32(v[2], s[2]), + v_perm_b32(v[3], v[0], v[1], v[2]), + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][3] + self.assertEqual(result, 0x00FFFF00, f"Expected 0x00FFFF00, got 0x{result:08x}") + def test_v_dot2_f32_bf16_basic(self): """V_DOT2_F32_BF16: Dot product of two bf16 pairs accumulated into f32.""" from extra.assembly.amd.pcode import _ibf16 From 69cdc8066d657c71304086dfc7e43e291ba002a6 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Tue, 30 Dec 2025 11:09:51 -0500 Subject: [PATCH 4/8] assembly/amd: add dtype tests to AMD IDE CI (#13899) * add dtype tests to AMD IDE CI * more tests * add trig preop * regen done * split to amd autogen * simpler --- .github/workflows/test.yml | 21 +- extra/assembly/amd/autogen/cdna/gen_pcode.py | 32 ++ extra/assembly/amd/autogen/rdna3/gen_pcode.py | 317 ++++++++++++------ extra/assembly/amd/autogen/rdna4/gen_pcode.py | 275 ++++++++++----- extra/assembly/amd/dsl.py | 4 + extra/assembly/amd/emu.py | 31 +- extra/assembly/amd/pcode.py | 49 ++- extra/assembly/amd/test/test_emu.py | 163 +++++++++ 8 files changed, 701 insertions(+), 191 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 657a85a077..ef2f8ce66b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -654,7 +654,7 @@ jobs: - name: Run process replay tests uses: ./.github/actions/process-replay - testrdna3: + testamdasm: name: AMD ASM IDE runs-on: ubuntu-24.04 timeout-minutes: 10 @@ -679,8 +679,23 @@ jobs: run: python -m pytest -n=auto extra/assembly/amd/ --durations 20 - name: Run RDNA3 emulator tests (AMD_LLVM=1) run: AMD_LLVM=1 python -m pytest -n=auto extra/assembly/amd/ --durations 20 - - name: Install pdfplumber - run: pip install pdfplumber + - name: Run RDNA3 dtype tests + run: PYTHONPATH="." AMD=1 PYTHON_REMU=1 MOCKGPU=1 AMD_LLVM=0 pytest -n=auto test/test_dtype_alu.py test/test_dtype.py + - name: Run RDNA3 dtype tests (AMD_LLVM=1) + run: PYTHONPATH="." AMD=1 PYTHON_REMU=1 MOCKGPU=1 AMD_LLVM=1 pytest -n=auto test/test_dtype_alu.py test/test_dtype.py + + testamdautogen: + name: AMD autogen + runs-on: ubuntu-24.04 + timeout-minutes: 10 + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Setup Environment + uses: ./.github/actions/setup-tinygrad + with: + key: rdna3-autogen + pydeps: "pdfplumber" - name: Verify AMD autogen is up to date run: | python -m extra.assembly.amd.dsl --arch all diff --git a/extra/assembly/amd/autogen/cdna/gen_pcode.py b/extra/assembly/amd/autogen/cdna/gen_pcode.py index cb2f3e8f06..b5cd12abc0 100644 --- a/extra/assembly/amd/autogen/cdna/gen_pcode.py +++ b/extra/assembly/amd/autogen/cdna/gen_pcode.py @@ -18284,6 +18284,37 @@ def _VOP3AOp_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V result['d0_64'] = True return result +def _VOP3AOp_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # shift = 32'I(S1[4 : 0].u32) * 53; + # if exponent(S0.f64) > 1077 then + # shift += exponent(S0.f64) - 1077 + # endif; + # // (2.0/PI) == 0.{b_1200, b_1199, b_1198, ..., b_1, b_0} + # // b_1200 is the MSB of the fractional part of 2.0/PI + # // Left shift operation indicates which bits are brought + # result = 64'F((1201'B(2.0 / PI)[1200 : 0] << shift.u32) & 1201'0x1fffffffffffff); + # scale = -53 - shift; + # if exponent(S0.f64) >= 1968 then + # scale += 128 + # endif; + # D0.f64 = ldexp(result, scale) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + shift = (S1[4 : 0].u32) * 53 + if exponent(S0.f64) > 1077: + shift += exponent(S0.f64) - 1077 + result = float(((TWO_OVER_PI_1201[1200 : 0] << int(shift)) >> (1201 - 53)) & 0x1fffffffffffff) + scale = -53 - shift + if exponent(S0.f64) >= 1968: + scale += 128 + D0.f64 = ldexp(result, scale) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + def _VOP3AOp_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) S0 = Reg(s0) @@ -18940,6 +18971,7 @@ VOP3AOp_FUNCTIONS = { VOP3AOp.V_LSHLREV_B64: _VOP3AOp_V_LSHLREV_B64, VOP3AOp.V_LSHRREV_B64: _VOP3AOp_V_LSHRREV_B64, VOP3AOp.V_ASHRREV_I64: _VOP3AOp_V_ASHRREV_I64, + VOP3AOp.V_TRIG_PREOP_F64: _VOP3AOp_V_TRIG_PREOP_F64, VOP3AOp.V_BFM_B32: _VOP3AOp_V_BFM_B32, VOP3AOp.V_CVT_PKNORM_I16_F32: _VOP3AOp_V_CVT_PKNORM_I16_F32, VOP3AOp.V_CVT_PKNORM_U16_F32: _VOP3AOp_V_CVT_PKNORM_U16_F32, diff --git a/extra/assembly/amd/autogen/rdna3/gen_pcode.py b/extra/assembly/amd/autogen/rdna3/gen_pcode.py index df32416e22..e480cbb93e 100644 --- a/extra/assembly/amd/autogen/rdna3/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna3/gen_pcode.py @@ -5497,6 +5497,7 @@ def _VOP3Op_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5517,6 +5518,7 @@ def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5537,6 +5539,7 @@ def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5556,6 +5559,7 @@ def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5576,6 +5580,7 @@ def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5595,6 +5600,7 @@ def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5614,6 +5620,7 @@ def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5634,6 +5641,7 @@ def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5654,6 +5662,7 @@ def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5674,6 +5683,7 @@ def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5694,6 +5704,7 @@ def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5715,6 +5726,7 @@ def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5735,6 +5747,7 @@ def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5756,6 +5769,7 @@ def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5777,6 +5791,7 @@ def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5795,6 +5810,7 @@ def _VOP3Op_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5813,6 +5829,7 @@ def _VOP3Op_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5833,6 +5850,7 @@ def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5853,6 +5871,7 @@ def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5872,6 +5891,7 @@ def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5892,6 +5912,7 @@ def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5911,6 +5932,7 @@ def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5930,6 +5952,7 @@ def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5950,6 +5973,7 @@ def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5970,6 +5994,7 @@ def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5990,6 +6015,7 @@ def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6010,6 +6036,7 @@ def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6031,6 +6058,7 @@ def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6051,6 +6079,7 @@ def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6072,6 +6101,7 @@ def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6093,6 +6123,7 @@ def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6111,6 +6142,7 @@ def _VOP3Op_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6129,6 +6161,7 @@ def _VOP3Op_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6149,6 +6182,7 @@ def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6169,6 +6203,7 @@ def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6188,6 +6223,7 @@ def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6208,6 +6244,7 @@ def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6227,6 +6264,7 @@ def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6246,6 +6284,7 @@ def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6266,6 +6305,7 @@ def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6286,6 +6326,7 @@ def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6306,6 +6347,7 @@ def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6326,6 +6368,7 @@ def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6347,6 +6390,7 @@ def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6367,6 +6411,7 @@ def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6388,6 +6433,7 @@ def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6409,6 +6455,7 @@ def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6427,6 +6474,7 @@ def _VOP3Op_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6447,6 +6495,7 @@ def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6467,6 +6516,7 @@ def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6486,6 +6536,7 @@ def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6506,6 +6557,7 @@ def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6526,6 +6578,7 @@ def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6545,6 +6598,7 @@ def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6565,6 +6619,7 @@ def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6585,6 +6640,7 @@ def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6604,6 +6660,7 @@ def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6624,6 +6681,7 @@ def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6644,6 +6702,7 @@ def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6663,6 +6722,7 @@ def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6681,6 +6741,7 @@ def _VOP3Op_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6701,6 +6762,7 @@ def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6721,6 +6783,7 @@ def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6740,6 +6803,7 @@ def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6760,6 +6824,7 @@ def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6780,6 +6845,7 @@ def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6799,6 +6865,7 @@ def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6817,6 +6884,7 @@ def _VOP3Op_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6835,6 +6903,7 @@ def _VOP3Op_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6855,6 +6924,7 @@ def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6875,6 +6945,7 @@ def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6894,6 +6965,7 @@ def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6914,6 +6986,7 @@ def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6934,6 +7007,7 @@ def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6953,6 +7027,7 @@ def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6971,6 +7046,7 @@ def _VOP3Op_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6989,6 +7065,7 @@ def _VOP3Op_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7009,6 +7086,7 @@ def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7029,6 +7107,7 @@ def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7048,6 +7127,7 @@ def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7068,6 +7148,7 @@ def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7088,6 +7169,7 @@ def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7107,6 +7189,7 @@ def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7125,6 +7208,7 @@ def _VOP3Op_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7143,6 +7227,7 @@ def _VOP3Op_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7163,6 +7248,7 @@ def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7183,6 +7269,7 @@ def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7202,6 +7289,7 @@ def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7222,6 +7310,7 @@ def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7242,6 +7331,7 @@ def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7261,6 +7351,7 @@ def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7279,6 +7370,7 @@ def _VOP3Op_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7339,6 +7431,7 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7399,6 +7492,7 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7459,6 +7553,7 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7472,7 +7567,7 @@ def _VOP3Op_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7485,7 +7580,7 @@ def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7499,7 +7594,7 @@ def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7512,7 +7607,7 @@ def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7525,7 +7620,7 @@ def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7538,7 +7633,7 @@ def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7551,7 +7646,7 @@ def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7564,7 +7659,7 @@ def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7577,7 +7672,7 @@ def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7591,7 +7686,7 @@ def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7605,7 +7700,7 @@ def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7619,7 +7714,7 @@ def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7633,7 +7728,7 @@ def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7647,7 +7742,7 @@ def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7661,7 +7756,7 @@ def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7672,7 +7767,7 @@ def _VOP3Op_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7683,7 +7778,7 @@ def _VOP3Op_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7696,7 +7791,7 @@ def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7710,7 +7805,7 @@ def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7723,7 +7818,7 @@ def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7736,7 +7831,7 @@ def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7749,7 +7844,7 @@ def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7762,7 +7857,7 @@ def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7775,7 +7870,7 @@ def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7788,7 +7883,7 @@ def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7802,7 +7897,7 @@ def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7816,7 +7911,7 @@ def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7830,7 +7925,7 @@ def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7844,7 +7939,7 @@ def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7858,7 +7953,7 @@ def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7872,7 +7967,7 @@ def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7883,7 +7978,7 @@ def _VOP3Op_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7894,7 +7989,7 @@ def _VOP3Op_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7907,7 +8002,7 @@ def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7921,7 +8016,7 @@ def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7934,7 +8029,7 @@ def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7947,7 +8042,7 @@ def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7960,7 +8055,7 @@ def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7973,7 +8068,7 @@ def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7986,7 +8081,7 @@ def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7999,7 +8094,7 @@ def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8013,7 +8108,7 @@ def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8027,7 +8122,7 @@ def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8041,7 +8136,7 @@ def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8055,7 +8150,7 @@ def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8069,7 +8164,7 @@ def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8083,7 +8178,7 @@ def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8094,7 +8189,7 @@ def _VOP3Op_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8107,7 +8202,7 @@ def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8121,7 +8216,7 @@ def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8134,7 +8229,7 @@ def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8147,7 +8242,7 @@ def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8160,7 +8255,7 @@ def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8173,7 +8268,7 @@ def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8186,7 +8281,7 @@ def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8200,7 +8295,7 @@ def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8213,7 +8308,7 @@ def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8226,7 +8321,7 @@ def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8239,7 +8334,7 @@ def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8252,7 +8347,7 @@ def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8263,7 +8358,7 @@ def _VOP3Op_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8276,7 +8371,7 @@ def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8290,7 +8385,7 @@ def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8303,7 +8398,7 @@ def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8316,7 +8411,7 @@ def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8329,7 +8424,7 @@ def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8342,7 +8437,7 @@ def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8353,7 +8448,7 @@ def _VOP3Op_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8364,7 +8459,7 @@ def _VOP3Op_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8377,7 +8472,7 @@ def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8391,7 +8486,7 @@ def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8404,7 +8499,7 @@ def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8417,7 +8512,7 @@ def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8430,7 +8525,7 @@ def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8443,7 +8538,7 @@ def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8454,7 +8549,7 @@ def _VOP3Op_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8465,7 +8560,7 @@ def _VOP3Op_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8478,7 +8573,7 @@ def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8492,7 +8587,7 @@ def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8505,7 +8600,7 @@ def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8518,7 +8613,7 @@ def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8531,7 +8626,7 @@ def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8544,7 +8639,7 @@ def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8555,7 +8650,7 @@ def _VOP3Op_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8566,7 +8661,7 @@ def _VOP3Op_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 0 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8579,7 +8674,7 @@ def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8593,7 +8688,7 @@ def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8606,7 +8701,7 @@ def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8619,7 +8714,7 @@ def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8632,7 +8727,7 @@ def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8645,7 +8740,7 @@ def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8656,7 +8751,7 @@ def _VOP3Op_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = 1 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8709,7 +8804,7 @@ def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, EXEC.u64[laneId] = result # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8762,7 +8857,7 @@ def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, EXEC.u64[laneId] = result # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8815,7 +8910,7 @@ def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, EXEC.u64[laneId] = result # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -12162,6 +12257,37 @@ def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result +def _VOP3Op_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # shift = 32'I(S1[4 : 0].u32) * 53; + # if exponent(S0.f64) > 1077 then + # shift += exponent(S0.f64) - 1077 + # endif; + # // (2.0/PI) == 0.{b_1200, b_1199, b_1198, ..., b_1, b_0} + # // b_1200 is the MSB of the fractional part of 2.0/PI + # // Left shift operation indicates which bits are brought + # result = 64'F((1201'B(2.0 / PI)[1200 : 0] << shift.u32) & 1201'0x1fffffffffffff); + # scale = -53 - shift; + # if exponent(S0.f64) >= 1968 then + # scale += 128 + # endif; + # D0.f64 = ldexp(result, scale) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + shift = (S1[4 : 0].u32) * 53 + if exponent(S0.f64) > 1077: + shift += exponent(S0.f64) - 1077 + result = float(((TWO_OVER_PI_1201[1200 : 0] << int(shift)) >> (1201 - 53)) & 0x1fffffffffffff) + scale = -53 - shift + if exponent(S0.f64) >= 1968: + scale += 128 + D0.f64 = ldexp(result, scale) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 << S0[3 : 0].u32) S0 = Reg(s0) @@ -12695,6 +12821,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_MUL_LO_U32: _VOP3Op_V_MUL_LO_U32, VOP3Op.V_MUL_HI_U32: _VOP3Op_V_MUL_HI_U32, VOP3Op.V_MUL_HI_I32: _VOP3Op_V_MUL_HI_I32, + VOP3Op.V_TRIG_PREOP_F64: _VOP3Op_V_TRIG_PREOP_F64, VOP3Op.V_LSHLREV_B16: _VOP3Op_V_LSHLREV_B16, VOP3Op.V_LSHRREV_B16: _VOP3Op_V_LSHRREV_B16, VOP3Op.V_ASHRREV_I16: _VOP3Op_V_ASHRREV_I16, diff --git a/extra/assembly/amd/autogen/rdna4/gen_pcode.py b/extra/assembly/amd/autogen/rdna4/gen_pcode.py index 15a92ee453..70dd62eca1 100644 --- a/extra/assembly/amd/autogen/rdna4/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna4/gen_pcode.py @@ -5575,6 +5575,7 @@ def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5595,6 +5596,7 @@ def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5614,6 +5616,7 @@ def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5634,6 +5637,7 @@ def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5653,6 +5657,7 @@ def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5672,6 +5677,7 @@ def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5692,6 +5698,7 @@ def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5712,6 +5719,7 @@ def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5732,6 +5740,7 @@ def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5752,6 +5761,7 @@ def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5773,6 +5783,7 @@ def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5793,6 +5804,7 @@ def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5814,6 +5826,7 @@ def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5835,6 +5848,7 @@ def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5855,6 +5869,7 @@ def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5875,6 +5890,7 @@ def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5894,6 +5910,7 @@ def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5914,6 +5931,7 @@ def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5933,6 +5951,7 @@ def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5952,6 +5971,7 @@ def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5972,6 +5992,7 @@ def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -5992,6 +6013,7 @@ def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6012,6 +6034,7 @@ def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6032,6 +6055,7 @@ def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6053,6 +6077,7 @@ def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6073,6 +6098,7 @@ def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6094,6 +6120,7 @@ def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6115,6 +6142,7 @@ def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6135,6 +6163,7 @@ def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6155,6 +6184,7 @@ def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6174,6 +6204,7 @@ def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6194,6 +6225,7 @@ def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6213,6 +6245,7 @@ def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6232,6 +6265,7 @@ def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6252,6 +6286,7 @@ def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6272,6 +6307,7 @@ def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6292,6 +6328,7 @@ def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6312,6 +6349,7 @@ def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6333,6 +6371,7 @@ def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6353,6 +6392,7 @@ def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6374,6 +6414,7 @@ def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6395,6 +6436,7 @@ def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6415,6 +6457,7 @@ def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6435,6 +6478,7 @@ def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6454,6 +6498,7 @@ def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6474,6 +6519,7 @@ def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6494,6 +6540,7 @@ def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6513,6 +6560,7 @@ def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6533,6 +6581,7 @@ def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6553,6 +6602,7 @@ def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6572,6 +6622,7 @@ def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6592,6 +6643,7 @@ def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6612,6 +6664,7 @@ def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6631,6 +6684,7 @@ def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6651,6 +6705,7 @@ def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6671,6 +6726,7 @@ def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6690,6 +6746,7 @@ def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6710,6 +6767,7 @@ def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6730,6 +6788,7 @@ def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6749,6 +6808,7 @@ def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6769,6 +6829,7 @@ def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6789,6 +6850,7 @@ def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6808,6 +6870,7 @@ def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6828,6 +6891,7 @@ def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6848,6 +6912,7 @@ def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6867,6 +6932,7 @@ def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6887,6 +6953,7 @@ def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6907,6 +6974,7 @@ def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6926,6 +6994,7 @@ def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6946,6 +7015,7 @@ def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6966,6 +7036,7 @@ def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -6985,6 +7056,7 @@ def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7005,6 +7077,7 @@ def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7025,6 +7098,7 @@ def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7044,6 +7118,7 @@ def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7064,6 +7139,7 @@ def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7084,6 +7160,7 @@ def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7103,6 +7180,7 @@ def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7163,6 +7241,7 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7223,6 +7302,7 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7283,6 +7363,7 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 + result['vcc_lane'] = (D0._val >> lane) & 1 result['d0_64'] = True _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 result['new_pc'] = _pc # absolute byte address @@ -7298,7 +7379,7 @@ def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 < S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7312,7 +7393,7 @@ def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 == S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7325,7 +7406,7 @@ def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 <= S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7338,7 +7419,7 @@ def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 > S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7351,7 +7432,7 @@ def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 != S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7364,7 +7445,7 @@ def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f16 >= S1.f16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7377,7 +7458,7 @@ def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7390,7 +7471,7 @@ def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7404,7 +7485,7 @@ def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 >= S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7418,7 +7499,7 @@ def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 != S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7432,7 +7513,7 @@ def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 > S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7446,7 +7527,7 @@ def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 <= S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7460,7 +7541,7 @@ def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 == S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7474,7 +7555,7 @@ def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f16 < S1.f16) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7487,7 +7568,7 @@ def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 < S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7501,7 +7582,7 @@ def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 == S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7514,7 +7595,7 @@ def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 <= S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7527,7 +7608,7 @@ def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 > S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7540,7 +7621,7 @@ def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 != S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7553,7 +7634,7 @@ def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f32 >= S1.f32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7566,7 +7647,7 @@ def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7579,7 +7660,7 @@ def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7593,7 +7674,7 @@ def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 >= S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7607,7 +7688,7 @@ def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 != S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7621,7 +7702,7 @@ def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 > S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7635,7 +7716,7 @@ def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 <= S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7649,7 +7730,7 @@ def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 == S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7663,7 +7744,7 @@ def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f32 < S1.f32) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7676,7 +7757,7 @@ def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 < S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7690,7 +7771,7 @@ def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 == S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7703,7 +7784,7 @@ def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 <= S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7716,7 +7797,7 @@ def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 > S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7729,7 +7810,7 @@ def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 != S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7742,7 +7823,7 @@ def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.f64 >= S1.f64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7755,7 +7836,7 @@ def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7768,7 +7849,7 @@ def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7782,7 +7863,7 @@ def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 >= S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7796,7 +7877,7 @@ def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 != S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7810,7 +7891,7 @@ def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 > S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7824,7 +7905,7 @@ def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 <= S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7838,7 +7919,7 @@ def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 == S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7852,7 +7933,7 @@ def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V EXEC.u64[laneId] = not (S0.f64 < S1.f64) # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7865,7 +7946,7 @@ def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 < S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7879,7 +7960,7 @@ def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 == S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7892,7 +7973,7 @@ def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 <= S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7905,7 +7986,7 @@ def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 > S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7918,7 +7999,7 @@ def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 != S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7931,7 +8012,7 @@ def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i16 >= S1.i16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7944,7 +8025,7 @@ def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 < S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7958,7 +8039,7 @@ def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 == S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7971,7 +8052,7 @@ def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 <= S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7984,7 +8065,7 @@ def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 > S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -7997,7 +8078,7 @@ def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 != S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8010,7 +8091,7 @@ def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u16 >= S1.u16 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8023,7 +8104,7 @@ def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 < S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8037,7 +8118,7 @@ def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 == S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8050,7 +8131,7 @@ def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 <= S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8063,7 +8144,7 @@ def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 > S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8076,7 +8157,7 @@ def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 != S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8089,7 +8170,7 @@ def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i32 >= S1.i32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8102,7 +8183,7 @@ def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 < S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8116,7 +8197,7 @@ def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 == S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8129,7 +8210,7 @@ def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 <= S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8142,7 +8223,7 @@ def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 > S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8155,7 +8236,7 @@ def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 != S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8168,7 +8249,7 @@ def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u32 >= S1.u32 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8181,7 +8262,7 @@ def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 < S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8195,7 +8276,7 @@ def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 == S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8208,7 +8289,7 @@ def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 <= S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8221,7 +8302,7 @@ def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 > S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8234,7 +8315,7 @@ def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 != S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8247,7 +8328,7 @@ def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.i64 >= S1.i64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8260,7 +8341,7 @@ def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 < S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8274,7 +8355,7 @@ def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 == S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8287,7 +8368,7 @@ def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 <= S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8300,7 +8381,7 @@ def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 > S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8313,7 +8394,7 @@ def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 != S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8326,7 +8407,7 @@ def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG EXEC.u64[laneId] = S0.u64 >= S1.u64 # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8379,7 +8460,7 @@ def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, EXEC.u64[laneId] = result # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8432,7 +8513,7 @@ def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, EXEC.u64[laneId] = result # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -8485,7 +8566,7 @@ def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, EXEC.u64[laneId] = result # --- end pseudocode --- result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val + result['exec_lane'] = (EXEC._val >> lane) & 1 return result def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): @@ -11985,6 +12066,37 @@ def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP result = {'d0': D0._val, 'scc': scc & 1} return result +def _VOP3Op_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): + # shift = 32'I(S1[4 : 0].u32) * 53; + # if exponent(S0.f64) > 1077 then + # shift += exponent(S0.f64) - 1077 + # endif; + # // (2.0/PI) == 0.{b_1200, b_1199, b_1198, ..., b_1, b_0} + # // b_1200 is the MSB of the fractional part of 2.0/PI + # // Left shift operation indicates which bits are brought + # result = 64'F((1201'B(2.0 / PI)[1200 : 0] << shift.u32) & 1201'0x1fffffffffffff); + # scale = -53 - shift; + # if exponent(S0.f64) >= 1968 then + # scale += 128 + # endif; + # D0.f64 = ldexp(result, scale) + S0 = Reg(s0) + S1 = Reg(s1) + D0 = Reg(d0) + # --- compiled pseudocode --- + shift = (S1[4 : 0].u32) * 53 + if exponent(S0.f64) > 1077: + shift += exponent(S0.f64) - 1077 + result = float(((TWO_OVER_PI_1201[1200 : 0] << int(shift)) >> (1201 - 53)) & 0x1fffffffffffff) + scale = -53 - shift + if exponent(S0.f64) >= 1968: + scale += 128 + D0.f64 = ldexp(result, scale) + # --- end pseudocode --- + result = {'d0': D0._val, 'scc': scc & 1} + result['d0_64'] = True + return result + def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): # D0.u16 = (S1.u16 << S0[3 : 0].u32) S0 = Reg(s0) @@ -12748,6 +12860,7 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_MUL_LO_U32: _VOP3Op_V_MUL_LO_U32, VOP3Op.V_MUL_HI_U32: _VOP3Op_V_MUL_HI_U32, VOP3Op.V_MUL_HI_I32: _VOP3Op_V_MUL_HI_I32, + VOP3Op.V_TRIG_PREOP_F64: _VOP3Op_V_TRIG_PREOP_F64, VOP3Op.V_LSHLREV_B16: _VOP3Op_V_LSHLREV_B16, VOP3Op.V_LSHRREV_B16: _VOP3Op_V_LSHRREV_B16, VOP3Op.V_ASHRREV_I16: _VOP3Op_V_ASHRREV_I16, diff --git a/extra/assembly/amd/dsl.py b/extra/assembly/amd/dsl.py index 615597e81b..ef2f3aefdc 100644 --- a/extra/assembly/amd/dsl.py +++ b/extra/assembly/amd/dsl.py @@ -283,6 +283,10 @@ class Inst: from extra.assembly.amd.autogen.rdna3 import VOP3Op try: op_name = VOP3Op(op).name except ValueError: pass + if op_name is None and self.__class__.__name__ == 'VOPC': + from extra.assembly.amd.autogen.rdna3 import VOPCOp + try: op_name = VOPCOp(op).name + except ValueError: pass if op_name is None: return False # V_LDEXP_F64 has 32-bit integer exponent in src1, so literal is 32-bit if op_name == 'V_LDEXP_F64': return False diff --git a/extra/assembly/amd/emu.py b/extra/assembly/amd/emu.py index 7e9dbd014b..c99720aceb 100644 --- a/extra/assembly/amd/emu.py +++ b/extra/assembly/amd/emu.py @@ -17,6 +17,7 @@ VCC_LO, VCC_HI, NULL, EXEC_LO, EXEC_HI, SCC = SrcEnum.VCC_LO, SrcEnum.VCC_HI, Sr # VOP3 ops that use 64-bit operands (and thus 64-bit literals when src is 255) # Exception: V_LDEXP_F64 has 32-bit integer src1, so literal should NOT be 64-bit when src1=255 _VOP3_64BIT_OPS = {op.value for op in VOP3Op if op.name.endswith(('_F64', '_B64', '_I64', '_U64'))} +_VOPC_64BIT_OPS = {op.value for op in VOPCOp if op.name.endswith(('_F64', '_B64', '_I64', '_U64'))} # Ops where src1 is 32-bit (exponent/shift amount) even though the op name suggests 64-bit _VOP3_64BIT_OPS_32BIT_SRC1 = {VOP3Op.V_LDEXP_F64.value} # Ops with 16-bit types in name (for source/dest handling) @@ -185,7 +186,7 @@ def decode_program(data: bytes) -> Program: # Exception: some ops have mixed src sizes (e.g., V_LDEXP_F64 has 32-bit src1) op_val = inst._values.get('op') if hasattr(op_val, 'value'): op_val = op_val.value - is_64bit = inst_class is VOP3 and op_val in _VOP3_64BIT_OPS + is_64bit = (inst_class is VOP3 and op_val in _VOP3_64BIT_OPS) or (inst_class is VOPC and op_val in _VOPC_64BIT_OPS) # Don't treat literal as 64-bit if the op has 32-bit src1 and src1 is the literal if is_64bit and op_val in _VOP3_64BIT_OPS_32BIT_SRC1 and getattr(inst, 'src1', None) == 255: is_64bit = False @@ -336,14 +337,22 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No op = VOP3SDOp(inst.op) fn = compiled.get(VOP3SDOp, {}).get(op) if fn is None: raise NotImplementedError(f"{op.name} not in pseudocode") - s0, s1, s2 = st.rsrc(inst.src0, lane), st.rsrc(inst.src1, lane), st.rsrc(inst.src2, lane) - # For 64-bit src2 ops (V_MAD_U64_U32, V_MAD_I64_I32), read from consecutive registers + # VOP3SD has both 32-bit ops (V_ADD_CO_CI_U32, etc.) and 64-bit ops (V_DIV_SCALE_F64, V_MAD_U64_U32, etc.) + div_scale_64_ops = (VOP3SDOp.V_DIV_SCALE_F64,) mad64_ops = (VOP3SDOp.V_MAD_U64_U32, VOP3SDOp.V_MAD_I64_I32) - if op in mad64_ops: + if op in div_scale_64_ops: + # V_DIV_SCALE_F64: all sources are 64-bit + s0, s1, s2 = st.rsrc64(inst.src0, lane), st.rsrc64(inst.src1, lane), st.rsrc64(inst.src2, lane) + elif op in mad64_ops: + # V_MAD_U64_U32, V_MAD_I64_I32: src0/src1 are 32-bit, src2 is 64-bit + s0, s1 = st.rsrc(inst.src0, lane), st.rsrc(inst.src1, lane) if inst.src2 >= 256: # VGPR s2 = V[inst.src2 - 256] | (V[inst.src2 - 256 + 1] << 32) else: # SGPR - read 64-bit from consecutive SGPRs s2 = st.rsgpr64(inst.src2) + else: + # Default: 32-bit sources + s0, s1, s2 = st.rsrc(inst.src0, lane), st.rsrc(inst.src1, lane), st.rsrc(inst.src2, lane) d0 = V[inst.vdst] # For carry-in operations (V_*_CO_CI_*), src2 register contains the carry bitmask (not VCC). # The pseudocode uses VCC but in VOP3SD encoding, the actual carry source is inst.src2. @@ -516,8 +525,9 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No # For 64-bit shift ops: src0 is 32-bit (shift amount), src1 is 64-bit (value to shift) # For most other _B64/_I64/_U64/_F64 ops: all sources are 64-bit is_64bit_op = op.name.endswith(('_B64', '_I64', '_U64', '_F64')) - # V_LDEXP_F64: src0 is 64-bit float, src1 is 32-bit integer exponent - is_ldexp_64 = op in (VOP3Op.V_LDEXP_F64,) + # V_LDEXP_F64, V_TRIG_PREOP_F64, V_CMP_CLASS_F64, V_CMPX_CLASS_F64: src0 is 64-bit, src1 is 32-bit + is_ldexp_64 = op in (VOP3Op.V_LDEXP_F64, VOP3Op.V_TRIG_PREOP_F64, VOP3Op.V_CMP_CLASS_F64, VOP3Op.V_CMPX_CLASS_F64, + VOPCOp.V_CMP_CLASS_F64, VOPCOp.V_CMPX_CLASS_F64) is_shift_64 = op in (VOP3Op.V_LSHLREV_B64, VOP3Op.V_LSHRREV_B64, VOP3Op.V_ASHRREV_I64) # 16-bit source ops: use precomputed sets instead of string checks # Note: must check op_cls to avoid cross-enum value collisions @@ -531,7 +541,12 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No s2 = mod_src(st.rsrc(src2, lane), 2) if src2 is not None else 0 elif is_ldexp_64: s0 = mod_src64(st.rsrc64(src0, lane), 0) # mantissa is 64-bit float - s1 = mod_src(st.rsrc(src1, lane), 1) if src1 is not None else 0 # exponent is 32-bit int + # src1 is 32-bit int. For 64-bit ops (like V_CMP_CLASS_F64), the literal is stored shifted left by 32. + # For V_LDEXP_F64/V_TRIG_PREOP_F64, _is_64bit_op() returns False so literal is stored as-is. + s1_raw = st.rsrc(src1, lane) if src1 is not None else 0 + # Only shift if src1 is literal AND this is a true 64-bit op (V_CMP_CLASS ops, not LDEXP/TRIG_PREOP) + is_class_op = op in (VOP3Op.V_CMP_CLASS_F64, VOP3Op.V_CMPX_CLASS_F64, VOPCOp.V_CMP_CLASS_F64, VOPCOp.V_CMPX_CLASS_F64) + s1 = mod_src((s1_raw >> 32) if src1 == 255 and is_class_op else s1_raw, 1) s2 = mod_src(st.rsrc(src2, lane), 2) if src2 is not None else 0 elif is_64bit_op: # 64-bit ops: apply neg/abs modifiers using f64 interpretation for float ops @@ -651,7 +666,7 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No is_16bit_dst = (op_cls is VOP3Op and op in _VOP3_16BIT_DST_OPS) or (op_cls is VOP1Op and op in _VOP1_16BIT_DST_OPS) if writes_to_sgpr: st.wsgpr(vdst, result['d0'] & 0xffffffff) - elif result.get('d0_64') or is_64bit_op: + elif result.get('d0_64'): V[vdst] = result['d0'] & 0xffffffff V[vdst + 1] = (result['d0'] >> 32) & 0xffffffff elif is_16bit_dst and inst_type is VOP3: diff --git a/extra/assembly/amd/pcode.py b/extra/assembly/amd/pcode.py index c230384324..6cef8ff2e8 100644 --- a/extra/assembly/amd/pcode.py +++ b/extra/assembly/amd/pcode.py @@ -280,7 +280,7 @@ def f32_to_u8(f): return max(0, min(255, int(f))) if not math.isnan(f) else 0 def mantissa(f): if f == 0.0 or math.isinf(f) or math.isnan(f): return f m, _ = math.frexp(f) - return math.copysign(m * 2.0, f) + return m # AMD V_FREXP_MANT returns mantissa in [0.5, 1.0) range def signext_from_bit(val, bit): bit = int(bit) if bit == 0: return 0 @@ -301,6 +301,7 @@ __all__ = [ # Constants 'WAVE32', 'WAVE64', 'MASK32', 'MASK64', 'WAVE_MODE', 'DENORM', 'OVERFLOW_F32', 'UNDERFLOW_F32', 'OVERFLOW_F64', 'UNDERFLOW_F64', 'MAX_FLOAT_F32', 'ROUND_MODE', 'cvtToQuietNAN', 'DST', 'INF', 'PI', + 'TWO_OVER_PI_1201', # Aliases for pseudocode 's_ff1_i32_b32', 's_ff1_i32_b64', 'GT_NEG_ZERO', 'LT_NEG_ZERO', 'isNAN', 'isQuietNAN', 'isSignalNAN', 'fma', 'ldexp', 'sign', 'exponent', 'F', 'signext', @@ -359,12 +360,14 @@ class _Inf: f16 = f32 = f64 = float('inf') def __neg__(self): return _NegInf() def __pos__(self): return self + def __float__(self): return float('inf') def __eq__(self, other): return float(other) == float('inf') if not isinstance(other, _NegInf) else False def __req__(self, other): return self.__eq__(other) class _NegInf: f16 = f32 = f64 = float('-inf') def __neg__(self): return _Inf() def __pos__(self): return self + def __float__(self): return float('-inf') def __eq__(self, other): return float(other) == float('-inf') if not isinstance(other, _Inf) else False def __req__(self, other): return self.__eq__(other) INF = _Inf() @@ -380,6 +383,31 @@ DST = None # Placeholder, will be set in context MASK32, MASK64 = 0xffffffff, 0xffffffffffffffff +# 2/PI with 1201 bits of precision for V_TRIG_PREOP_F64 +# Computed as: int((2/pi) * 2^1201) - this is the fractional part of 2/pi scaled to integer +# The MSB (bit 1200) corresponds to 2^0 position in the fraction 0.b1200 b1199 ... b1 b0 +_TWO_OVER_PI_1201_RAW = 0x0145f306dc9c882a53f84eafa3ea69bb81b6c52b3278872083fca2c757bd778ac36e48dc74849ba5c00c925dd413a32439fc3bd63962534e7dd1046bea5d768909d338e04d68befc827323ac7306a673e93908bf177bf250763ff12fffbc0b301fde5e2316b414da3eda6cfd9e4f96136e9e8c7ecd3cbfd45aea4f758fd7cbe2f67a0e73ef14a525d4d7f6bf623f1aba10ac06608df8f6 + +class _BigInt: + """Wrapper for large integers that supports bit slicing [high:low].""" + __slots__ = ('_val',) + def __init__(self, val): self._val = val + def __getitem__(self, key): + if isinstance(key, slice): + high, low = key.start, key.stop + if high < low: high, low = low, high # Handle reversed slice + mask = (1 << (high - low + 1)) - 1 + return (self._val >> low) & mask + return (self._val >> key) & 1 + def __int__(self): return self._val + def __index__(self): return self._val + def __lshift__(self, n): return self._val << int(n) + def __rshift__(self, n): return self._val >> int(n) + def __and__(self, n): return self._val & int(n) + def __or__(self, n): return self._val | int(n) + +TWO_OVER_PI_1201 = _BigInt(_TWO_OVER_PI_1201_RAW) + class _WaveMode: IEEE = False WAVE_MODE = _WaveMode() @@ -693,6 +721,9 @@ def _expr(e: str) -> str: return f'_pack({hi}, {lo})' e = re.sub(r'\{\s*([^,{}]+)\s*,\s*([^,{}]+)\s*\}', pack, e) + # Special constant: 1201'B(2.0 / PI) -> TWO_OVER_PI_1201 (precomputed 1201-bit 2/pi) + e = re.sub(r"1201'B\(2\.0\s*/\s*PI\)", "TWO_OVER_PI_1201", e) + # Literals: 1'0U -> 0, 32'I(x) -> (x), B(x) -> (x) e = re.sub(r"\d+'([0-9a-fA-Fx]+)[UuFf]*", r'\1', e) e = re.sub(r"\d+'[FIBU]\(", "(", e) @@ -815,7 +846,7 @@ INST_PATTERN = re.compile(r'^([SV]_[A-Z0-9_]+)\s+(\d+)\s*$', re.M) UNSUPPORTED = ['SGPR[', 'V_SWAP', 'eval ', 'FATAL_HALT', 'HW_REGISTERS', 'vscnt', 'vmcnt', 'expcnt', 'lgkmcnt', 'CVT_OFF_TABLE', 'ThreadMask', - 'S1[i', 'C.i32', 'S[i]', 'in[', '2.0 / PI', + 'S1[i', 'C.i32', 'S[i]', 'in[', 'if n.', 'DST.u32', 'addrd = DST', 'addr = DST'] # Malformed pseudocode from PDF def extract_pseudocode(text: str) -> str | None: @@ -1050,12 +1081,22 @@ from extra.assembly.amd.pcode import * code = code.replace( 'D0.f64 = ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64)))', 'D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64)))') + # V_TRIG_PREOP_F64: AMD pseudocode uses (x << shift) & mask but mask needs to extract TOP bits. + # The PDF shows: result = 64'F((1201'B(2.0/PI)[1200:0] << shift) & 1201'0x1fffffffffffff) + # Issues to fix: + # 1. After left shift, the interesting bits are at the top, not bottom - need >> (1201-53) + # 2. shift.u32 fails because shift is a plain int after * 53 - use int(shift) + # 3. 64'F(...) means convert int to float (not interpret as bit pattern) - use float() + if op.name == 'V_TRIG_PREOP_F64': + code = code.replace( + 'result = F((TWO_OVER_PI_1201[1200 : 0] << shift.u32) & 0x1fffffffffffff)', + 'result = float(((TWO_OVER_PI_1201[1200 : 0] << int(shift)) >> (1201 - 53)) & 0x1fffffffffffff)') # Detect flags for result handling is_64 = any(p in pc for p in ['D0.u64', 'D0.b64', 'D0.f64', 'D0.i64', 'D1.u64', 'D1.b64', 'D1.f64', 'D1.i64']) has_d1 = '{ D1' in pc if has_d1: is_64 = True - is_cmp = cls_name == 'VOPCOp' and 'D0.u64[laneId]' in pc - is_cmpx = cls_name == 'VOPCOp' and 'EXEC.u64[laneId]' in pc # V_CMPX writes to EXEC per-lane + is_cmp = (cls_name == 'VOPCOp' or cls_name == 'VOP3Op') and 'D0.u64[laneId]' in pc + is_cmpx = (cls_name == 'VOPCOp' or cls_name == 'VOP3Op') and 'EXEC.u64[laneId]' in pc # V_CMPX writes to EXEC per-lane # V_DIV_SCALE passes through S0 if no branch taken is_div_scale = 'DIV_SCALE' in op.name # VOP3SD instructions that write VCC per-lane (either via VCC.u64[laneId] or by setting VCC = 0/1) diff --git a/extra/assembly/amd/test/test_emu.py b/extra/assembly/amd/test/test_emu.py index e9055070ad..3761ddf356 100644 --- a/extra/assembly/amd/test/test_emu.py +++ b/extra/assembly/amd/test/test_emu.py @@ -2454,6 +2454,82 @@ class TestF64Conversions(unittest.TestCase): result = struct.unpack('> 32), + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], s[1]), + s_mov_b32(s[2], 0xDEADBEEF), # Canary value + v_mov_b32_e32(v[3], s[2]), # Put canary in v3 + v_cvt_i32_f64_e32(v[2], v[0:2]), # Convert -1.0 -> -1 (0xffffffff) + ] + st = run_program(instructions, n_lanes=1) + result = st.vgpr[0][2] + canary = st.vgpr[0][3] + # V_CVT_I32_F64 of -1.0 should produce 0xffffffff (-1) + self.assertEqual(result, 0xffffffff, f"Expected 0xffffffff (-1), got 0x{result:08x}") + # v3 should still contain the canary (not clobbered by 64-bit write) + self.assertEqual(canary, 0xDEADBEEF, f"v3 canary should be 0xDEADBEEF, got 0x{canary:08x} (clobbered!)") + + def test_v_frexp_mant_f64_range(self): + """V_FREXP_MANT_F64 should return mantissa in [0.5, 1.0) range. + + Regression test: The mantissa() helper was incorrectly multiplying by 2.0, + returning values in [1.0, 2.0) instead of the correct [0.5, 1.0) range. + """ + # Test with 2.0: frexp(2.0) should give mantissa=0.5, exponent=2 + two_f64 = f2i64(2.0) + instructions = [ + s_mov_b32(s[0], two_f64 & 0xffffffff), + s_mov_b32(s[1], two_f64 >> 32), + v_frexp_mant_f64_e32(v[0:2], s[0:2]), + v_frexp_exp_i32_f64_e32(v[2], s[0:2]), + ] + st = run_program(instructions, n_lanes=1) + mant = i642f(st.vgpr[0][0] | (st.vgpr[0][1] << 32)) + exp = st.vgpr[0][2] + if exp >= 0x80000000: exp -= 0x100000000 # sign extend + # frexp(2.0) = 0.5 * 2^2 + self.assertAlmostEqual(mant, 0.5, places=10, msg=f"Expected mantissa 0.5, got {mant}") + self.assertEqual(exp, 2, f"Expected exponent 2, got {exp}") + + def test_v_div_scale_f64_reads_64bit_sources(self): + """V_DIV_SCALE_F64 must read all sources as 64-bit values. + + Regression test: VOP3SD was reading sources as 32-bit for V_DIV_SCALE_F64, + causing incorrect results when the low 32 bits happened to look like 0 or denorm. + """ + # Set up v0:v1 = sqrt(2) ≈ 1.414, v2:v3 = 1.0 + sqrt2_f64 = f2i64(1.4142135623730951) + one_f64 = f2i64(1.0) + instructions = [ + s_mov_b32(s[0], sqrt2_f64 & 0xffffffff), + s_mov_b32(s[1], sqrt2_f64 >> 32), + v_mov_b32_e32(v[0], s[0]), + v_mov_b32_e32(v[1], s[1]), + s_mov_b32(s[2], one_f64 & 0xffffffff), + s_mov_b32(s[3], one_f64 >> 32), + v_mov_b32_e32(v[2], s[2]), + v_mov_b32_e32(v[3], s[3]), + # V_DIV_SCALE_F64: src0=v0:v1, src1=v0:v1, src2=v2:v3 + # For normal inputs, should pass through src0 unchanged + VOP3SD(VOP3SDOp.V_DIV_SCALE_F64, vdst=v[4], sdst=s[10], src0=v[0], src1=v[0], src2=v[2]), + ] + st = run_program(instructions, n_lanes=1) + result = i642f(st.vgpr[0][4] | (st.vgpr[0][5] << 32)) + # For normal (non-denorm, non-edge-case) inputs, V_DIV_SCALE_F64 passes through src0 + self.assertAlmostEqual(result, 1.4142135623730951, places=10, + msg=f"Expected ~1.414, got {result} (may be nan if 64-bit sources not read correctly)") + class TestNewPcodeHelpers(unittest.TestCase): """Tests for newly added pcode helper functions (SAD, BYTE_PERMUTE, BF16).""" @@ -3650,3 +3726,90 @@ class TestVFmaMixSinCase(unittest.TestCase): # Result should be approximately -π = -3.14... # f16 -π ≈ 0xc248 = -3.140625 self.assertAlmostEqual(lo, -3.14159, delta=0.01, msg=f"Expected ~-π, got {lo}") + + +class TestVTrigPreopF64(unittest.TestCase): + """Tests for V_TRIG_PREOP_F64 instruction. + + V_TRIG_PREOP_F64 extracts chunks of 2/PI for Payne-Hanek trig range reduction. + For input S0 (f64) and index S1 (0, 1, or 2), it returns a portion of 2/PI + scaled appropriately for computing |S0| * (2/PI) in extended precision. + + The three chunks (index 0, 1, 2) when summed should equal 2/PI. + """ + + def test_trig_preop_f64_index0(self): + """V_TRIG_PREOP_F64 index=0: primary chunk of 2/PI.""" + import math + two_over_pi = 2.0 / math.pi + instructions = [ + # S0 = 1.0 (f64), S1 = 0 (index) + s_mov_b32(s[0], 0x00000000), # low bits of 1.0 + s_mov_b32(s[1], 0x3ff00000), # high bits of 1.0 + v_trig_preop_f64(v[0], abs(s[0]), 0), # index 0 + ] + st = run_program(instructions, n_lanes=1) + result = i642f(st.vgpr[0][0] | (st.vgpr[0][1] << 32)) + # For x=1.0, index=0 should give the main part of 2/PI + self.assertAlmostEqual(result, two_over_pi, places=10, msg=f"Expected ~{two_over_pi}, got {result}") + + def test_trig_preop_f64_index1(self): + """V_TRIG_PREOP_F64 index=1: secondary chunk (extended precision bits).""" + instructions = [ + s_mov_b32(s[0], 0x00000000), # low bits of 1.0 + s_mov_b32(s[1], 0x3ff00000), # high bits of 1.0 + v_trig_preop_f64(v[0], abs(s[0]), 1), # index 1 + ] + st = run_program(instructions, n_lanes=1) + result = i642f(st.vgpr[0][0] | (st.vgpr[0][1] << 32)) + # Index 1 gives the next 53 bits, should be very small (~1e-16) + self.assertLess(abs(result), 1e-15, msg=f"Expected tiny value, got {result}") + self.assertGreater(abs(result), 0, msg="Expected non-zero value") + + def test_trig_preop_f64_index2(self): + """V_TRIG_PREOP_F64 index=2: tertiary chunk (more extended precision bits).""" + instructions = [ + s_mov_b32(s[0], 0x00000000), # low bits of 1.0 + s_mov_b32(s[1], 0x3ff00000), # high bits of 1.0 + v_trig_preop_f64(v[0], abs(s[0]), 2), # index 2 + ] + st = run_program(instructions, n_lanes=1) + result = i642f(st.vgpr[0][0] | (st.vgpr[0][1] << 32)) + # Index 2 gives the next 53 bits after index 1, should be tiny (~1e-32) + self.assertLess(abs(result), 1e-30, msg=f"Expected very tiny value, got {result}") + + def test_trig_preop_f64_sum_equals_two_over_pi(self): + """V_TRIG_PREOP_F64: sum of chunks 0,1,2 should equal 2/PI.""" + import math + two_over_pi = 2.0 / math.pi + instructions = [ + s_mov_b32(s[0], 0x00000000), # low bits of 1.0 + s_mov_b32(s[1], 0x3ff00000), # high bits of 1.0 + v_trig_preop_f64(v[0], abs(s[0]), 0), # index 0 -> v[0:1] + v_trig_preop_f64(v[2], abs(s[0]), 1), # index 1 -> v[2:3] + v_trig_preop_f64(v[4], abs(s[0]), 2), # index 2 -> v[4:5] + ] + st = run_program(instructions, n_lanes=1) + p0 = i642f(st.vgpr[0][0] | (st.vgpr[0][1] << 32)) + p1 = i642f(st.vgpr[0][2] | (st.vgpr[0][3] << 32)) + p2 = i642f(st.vgpr[0][4] | (st.vgpr[0][5] << 32)) + total = p0 + p1 + p2 + self.assertAlmostEqual(total, two_over_pi, places=14, msg=f"Expected {two_over_pi}, got {total} (p0={p0}, p1={p1}, p2={p2})") + + def test_trig_preop_f64_large_input(self): + """V_TRIG_PREOP_F64 with larger input should adjust shift based on exponent.""" + import math + # For x=2.0, exponent(2.0)=1024 which is <= 1077, so no adjustment + # But let's test with x=2^60 where exponent > 1077 + large_val = 2.0 ** 60 # exponent = 1083 > 1077 + large_bits = f2i64(large_val) + instructions = [ + s_mov_b32(s[0], large_bits & 0xffffffff), + s_mov_b32(s[1], (large_bits >> 32) & 0xffffffff), + v_trig_preop_f64(v[0], abs(s[0]), 0), + ] + st = run_program(instructions, n_lanes=1) + result = i642f(st.vgpr[0][0] | (st.vgpr[0][1] << 32)) + # Result should still be a valid float (not NaN or inf) + self.assertFalse(math.isnan(result), "Result should not be NaN") + self.assertFalse(math.isinf(result), "Result should not be inf") From 7e14cdcb06267193c7a345bedc454691c13ae060 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Tue, 30 Dec 2025 11:59:28 -0500 Subject: [PATCH 5/8] assembly/amd: clean up clt/ctz hack (#13901) * assembly/amd: clean up clt/ctz hack * add breaks --- extra/assembly/amd/autogen/cdna/gen_pcode.py | 28 +++++++++---------- extra/assembly/amd/autogen/rdna3/gen_pcode.py | 8 +++--- extra/assembly/amd/autogen/rdna4/gen_pcode.py | 8 +++--- extra/assembly/amd/pcode.py | 18 ++++++------ extra/assembly/amd/test/test_pcode.py | 2 ++ 5 files changed, 33 insertions(+), 31 deletions(-) diff --git a/extra/assembly/amd/autogen/cdna/gen_pcode.py b/extra/assembly/amd/autogen/cdna/gen_pcode.py index b5cd12abc0..e95ab11713 100644 --- a/extra/assembly/amd/autogen/cdna/gen_pcode.py +++ b/extra/assembly/amd/autogen/cdna/gen_pcode.py @@ -251,7 +251,7 @@ def _SOP1Op_S_FF0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[i] == 0: - tmp = Reg(i) + tmp = Reg(i); break D0.i32 = tmp # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} @@ -274,7 +274,7 @@ def _SOP1Op_S_FF0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[i] == 0: - tmp = Reg(i) + tmp = Reg(i); break D0.i32 = tmp # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} @@ -297,7 +297,7 @@ def _SOP1Op_S_FF1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[i] == 1: - tmp = Reg(i) + tmp = Reg(i); break D0.i32 = tmp # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} @@ -320,7 +320,7 @@ def _SOP1Op_S_FF1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[i] == 1: - tmp = Reg(i) + tmp = Reg(i); break D0.i32 = tmp # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} @@ -343,7 +343,7 @@ def _SOP1Op_S_FLBIT_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: - tmp = Reg(i) + tmp = Reg(i); break D0.i32 = tmp # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} @@ -366,7 +366,7 @@ def _SOP1Op_S_FLBIT_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[63 - i] == 1: - tmp = Reg(i) + tmp = Reg(i); break D0.i32 = tmp # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} @@ -389,7 +389,7 @@ def _SOP1Op_S_FLBIT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR tmp = Reg(-1) for i in range(1, int(31)+1): if S0.u32[31 - i] != S0.u32[31]: - tmp = Reg(i) + tmp = Reg(i); break D0.i32 = tmp # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} @@ -412,7 +412,7 @@ def _SOP1Op_S_FLBIT_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, tmp = Reg(-1) for i in range(1, int(63)+1): if S0.u64[63 - i] != S0.u64[63]: - tmp = Reg(i) + tmp = Reg(i); break D0.i32 = tmp # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} @@ -2964,7 +2964,7 @@ def _VOP1Op_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: - D0.i32 = i + D0.i32 = i; break # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} return result @@ -2984,7 +2984,7 @@ def _VOP1Op_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: - D0.i32 = i + D0.i32 = i; break # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} return result @@ -3004,7 +3004,7 @@ def _VOP1Op_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: - D0.i32 = i + D0.i32 = i; break # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} return result @@ -14702,7 +14702,7 @@ def _VOP3AOp_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: - D0.i32 = i + D0.i32 = i; break # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} return result @@ -14722,7 +14722,7 @@ def _VOP3AOp_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: - D0.i32 = i + D0.i32 = i; break # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} return result @@ -14742,7 +14742,7 @@ def _VOP3AOp_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: - D0.i32 = i + D0.i32 = i; break # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} return result diff --git a/extra/assembly/amd/autogen/rdna3/gen_pcode.py b/extra/assembly/amd/autogen/rdna3/gen_pcode.py index e480cbb93e..698ca243eb 100644 --- a/extra/assembly/amd/autogen/rdna3/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna3/gen_pcode.py @@ -185,7 +185,7 @@ def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, tmp = Reg(-1) for i in range(1, int(31)+1): if S0.u32[31 - i] != S0.u32[31]: - tmp = Reg(i) + tmp = Reg(i); break D0.i32 = tmp # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} @@ -208,7 +208,7 @@ def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG tmp = Reg(-1) for i in range(1, int(63)+1): if S0.u64[63 - i] != S0.u64[63]: - tmp = Reg(i) + tmp = Reg(i); break D0.i32 = tmp # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} @@ -4190,7 +4190,7 @@ def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: - D0.i32 = i + D0.i32 = i; break # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} return result @@ -9472,7 +9472,7 @@ def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: - D0.i32 = i + D0.i32 = i; break # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} return result diff --git a/extra/assembly/amd/autogen/rdna4/gen_pcode.py b/extra/assembly/amd/autogen/rdna4/gen_pcode.py index 70dd62eca1..4f8633e01f 100644 --- a/extra/assembly/amd/autogen/rdna4/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna4/gen_pcode.py @@ -185,7 +185,7 @@ def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, tmp = Reg(-1) for i in range(1, int(31)+1): if S0.u32[31 - i] != S0.u32[31]: - tmp = Reg(i) + tmp = Reg(i); break D0.i32 = tmp # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} @@ -208,7 +208,7 @@ def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG tmp = Reg(-1) for i in range(1, int(63)+1): if S0.u64[63 - i] != S0.u64[63]: - tmp = Reg(i) + tmp = Reg(i); break D0.i32 = tmp # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} @@ -4184,7 +4184,7 @@ def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: - D0.i32 = i + D0.i32 = i; break # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} return result @@ -9128,7 +9128,7 @@ def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: - D0.i32 = i + D0.i32 = i; break # --- end pseudocode --- result = {'d0': D0._val, 'scc': scc & 1} return result diff --git a/extra/assembly/amd/pcode.py b/extra/assembly/amd/pcode.py index 6cef8ff2e8..bac30bd6ad 100644 --- a/extra/assembly/amd/pcode.py +++ b/extra/assembly/amd/pcode.py @@ -642,7 +642,7 @@ def compile_pseudocode(pseudocode: str) -> str: joined_lines.append(line) lines = [] - indent, need_pass = 0, False + indent, need_pass, in_first_match_loop = 0, False, False for line in joined_lines: line = line.strip() if not line or line.startswith('//'): continue @@ -671,14 +671,14 @@ def compile_pseudocode(pseudocode: str) -> str: elif line.startswith('endfor'): if need_pass: lines.append(' ' * indent + "pass") indent -= 1 - need_pass = False + need_pass, in_first_match_loop = False, False elif line.startswith('declare '): pass elif m := re.match(r'for (\w+) in (.+?)\s*:\s*(.+?) do', line): start, end = _expr(m[2].strip()), _expr(m[3].strip()) lines.append(' ' * indent + f"for {m[1]} in range({start}, int({end})+1):") indent += 1 - need_pass = True + need_pass, in_first_match_loop = True, True elif '=' in line and not line.startswith('=='): need_pass = False line = line.rstrip(';') @@ -697,7 +697,12 @@ def compile_pseudocode(pseudocode: str) -> str: break else: lhs, rhs = line.split('=', 1) - lines.append(' ' * indent + _assign(lhs.strip(), _expr(rhs.strip()))) + lhs_s, rhs_s = lhs.strip(), rhs.strip() + stmt = _assign(lhs_s, _expr(rhs_s)) + # CLZ/CTZ pattern: assignment of loop var to tmp/D0.i32 in first-match loop needs break + if in_first_match_loop and rhs_s == 'i' and (lhs_s == 'tmp' or lhs_s == 'D0.i32'): + stmt += "; break" + lines.append(' ' * indent + stmt) # If we ended with a control statement that needs a body, add pass if need_pass: lines.append(' ' * indent + "pass") return '\n'.join(lines) @@ -1014,11 +1019,6 @@ from extra.assembly.amd.pcode import * code = compile_pseudocode(pc) # NOTE: Do NOT add more code.replace() hacks here. Fix issues properly in the DSL # (compile_pseudocode, helper functions, or Reg/TypedView classes) instead. - # CLZ/CTZ: The PDF pseudocode searches for the first 1 bit but doesn't break. - # Hardware stops at first match. SOP1 uses tmp=i, VOP1/VOP3 use D0.i32=i - if 'CLZ' in op.name or 'CTZ' in op.name: - code = code.replace('tmp = Reg(i)', 'tmp = Reg(i); break') - code = code.replace('D0.i32 = i', 'D0.i32 = i; break') # V_DIV_FMAS_F32/F64: PDF page 449 says 2^32/2^64 but hardware behavior is more complex. # The scale direction depends on S2 (the addend): if exponent(S2) > 127 (i.e., S2 >= 2.0), # scale by 2^+64 (to unscale a numerator that was scaled). Otherwise scale by 2^-64 diff --git a/extra/assembly/amd/test/test_pcode.py b/extra/assembly/amd/test/test_pcode.py index 141b938baa..b848a48fe4 100644 --- a/extra/assembly/amd/test/test_pcode.py +++ b/extra/assembly/amd/test/test_pcode.py @@ -208,6 +208,8 @@ D0.u32 = tmp.u32""") for i in 0 : 31 do if S0.u32[i] == 1 then tmp = i + endif +endfor D0.i32 = tmp""") ctx = ExecContext(s0=0b1000) # Bit 3 is set ctx.run(code) From 39f99b207a632e0ce7e889457d57706dfd36285c Mon Sep 17 00:00:00 2001 From: chenyu Date: Tue, 30 Dec 2025 12:25:55 -0500 Subject: [PATCH 6/8] update IGNORE_OOB error message (#13904) IGNORE_OOB=1 to disable --- tinygrad/uop/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tinygrad/uop/validate.py b/tinygrad/uop/validate.py index af5b6af29b..ceedb5924f 100644 --- a/tinygrad/uop/validate.py +++ b/tinygrad/uop/validate.py @@ -70,7 +70,7 @@ def validate_index(buf:UOp, idx:UOp, gate:UOp|None=None): # WEBGPU has a BITCAST in the index. TODO: fix if any(x.op is Ops.BITCAST for x in idx.toposort()): return True - if not z3_imported: raise ImportError("z3 >= 4.12.4 is required for bounds checking, try IGNORE_OOB=0 or \"pip install 'z3-solver>=4.12.4\"") + if not z3_imported: raise ImportError("bounds checking requires z3 >= 4.12.4, use IGNORE_OOB=1 to disable, or \"pip install 'z3-solver>=4.12.4\"") solver = z3.Solver(ctx=z3.Context()) z3_idx, z3_mask = uops_to_z3(solver, idx, gate) solver.add(z3_mask) From 04c79505eccd281348ccdf52d7a61158e6074d10 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Tue, 30 Dec 2025 13:02:53 -0500 Subject: [PATCH 7/8] no subnormal bf16 (#13905) --- test/test_dtype_alu.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/test_dtype_alu.py b/test/test_dtype_alu.py index 102ae908b9..8de58d346a 100644 --- a/test/test_dtype_alu.py +++ b/test/test_dtype_alu.py @@ -1,7 +1,7 @@ import unittest, operator, math from tinygrad import Tensor, dtypes, Device from tinygrad.dtype import DType, truncate -from tinygrad.helpers import CI, getenv, CPU_LLVM +from tinygrad.helpers import CI, getenv from tinygrad.tensor import _to_np_dtype from tinygrad.device import is_dtype_supported from tinygrad.runtime.ops_python import from_storage_scalar @@ -48,7 +48,7 @@ class ht: int32 = strat.integers(-2147483648, 2147483647) int64 = strat.integers(-9223372036854775808, 9223372036854775807) bool = strat.booleans() -ht.bfloat16 = ht.uint16 +ht.bfloat16 = ht.uint16.filter(lambda x: ((x >> 7) & 0xFF) != 0) # filter subnormal bfloat16 ht.fp8e4m3 = ht.uint8 ht.fp8e5m2 = ht.uint8 @@ -138,7 +138,6 @@ class TestDTypeALU(unittest.TestCase): def test_float16_unary(self, a, op): universal_test_unary(a, dtypes.float16, op) @unittest.skipUnless(is_dtype_supported(dtypes.bfloat16), f"no bfloat16 on {Device.DEFAULT}") - @unittest.skipIf(CPU_LLVM, "bfloat16 precision issues with CPU_LLVM") @given(ht.bfloat16, strat.sampled_from(unary_operations)) def test_bfloat16_unary(self, a, op): universal_test_unary(from_storage_scalar(a, dtypes.bfloat16), dtypes.bfloat16, op) From 49d1bf93d636f032316f7d6f2203f0ee38374080 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Tue, 30 Dec 2025 13:51:40 -0500 Subject: [PATCH 8/8] assembly/amd: refactor asm.py to be simpler (#13900) * assembly/amd: refactor asm.py * assembly/amd: refactor asm.py to be simpler * multiple fxns * fast * more tests pass * regen * stop decode --- extra/assembly/amd/asm.py | 1310 ++++++++---------- extra/assembly/amd/autogen/cdna/__init__.py | 54 +- extra/assembly/amd/autogen/rdna3/__init__.py | 64 +- extra/assembly/amd/dsl.py | 66 +- extra/assembly/amd/emu.py | 23 +- extra/assembly/amd/test/test_llvm.py | 14 +- extra/assembly/amd/test/test_roundtrip.py | 44 +- 7 files changed, 739 insertions(+), 836 deletions(-) diff --git a/extra/assembly/amd/asm.py b/extra/assembly/amd/asm.py index 3496795dc9..6b88eb336f 100644 --- a/extra/assembly/amd/asm.py +++ b/extra/assembly/amd/asm.py @@ -3,21 +3,65 @@ from __future__ import annotations import re from extra.assembly.amd.dsl import Inst, RawImm, Reg, SrcMod, SGPR, VGPR, TTMP, s, v, ttmp, _RegFactory, FLOAT_ENC, SRC_FIELDS, unwrap from extra.assembly.amd.dsl import VCC_LO, VCC_HI, VCC, EXEC_LO, EXEC_HI, EXEC, SCC, M0, NULL, OFF +from extra.assembly.amd.autogen.rdna3 import VOP1, VOP2, VOP3, VOP3SD, VOP3P, VOPC, VOPD, VINTERP, SOP1, SOP2, SOPC, SOPK, SOPP, SMEM, DS, FLAT, MUBUF, MTBUF, MIMG, EXP +from extra.assembly.amd.autogen.rdna3 import VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp, VOPDOp, VINTERPOp +from extra.assembly.amd.autogen.rdna3 import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, SMEMOp, DSOp, FLATOp, MUBUFOp, MTBUFOp, MIMGOp + +# VOP3SD opcodes that share VOP3 encoding +VOP3SD_OPS = {288, 289, 290, 764, 765, 766, 767, 768, 769, 770} + +def detect_format(data: bytes) -> type[Inst]: + """Detect instruction format from machine code bytes.""" + assert len(data) >= 4, f"need at least 4 bytes, got {len(data)}" + word = int.from_bytes(data[:4], 'little') + hi2 = (word >> 30) & 0x3 + if hi2 == 0b11: + enc = (word >> 26) & 0xf + if enc == 0b0010: return VOPD + if enc == 0b0011: return VOP3P + if enc == 0b0100: return VINTERP + if enc == 0b0101: return VOP3SD if ((word >> 16) & 0x3ff) in VOP3SD_OPS else VOP3 + if enc == 0b0110: return DS + if enc == 0b0111: return FLAT + if enc == 0b1000: return MUBUF + if enc == 0b1010: return MTBUF + if enc == 0b1100 or enc == 0b1111: return MIMG + if enc == 0b1101: return SMEM + if enc == 0b1110: return EXP + raise ValueError(f"unknown 64-bit format enc={enc:#06b} word={word:#010x}") + if hi2 == 0b10: + enc = (word >> 23) & 0x7f + if enc == 0b1111101: return SOP1 + if enc == 0b1111110: return SOPC + if enc == 0b1111111: return SOPP + return SOPK if ((word >> 28) & 0xf) == 0b1011 else SOP2 + # hi2 == 0b00 or 0b01: VOP1/VOP2/VOPC (bit 31 = 0) + assert (word >> 31) == 0, f"expected bit 31 = 0 for VOP, got word={word:#010x}" + enc = (word >> 25) & 0x7f + if enc == 0b0111110: return VOPC + if enc == 0b0111111: return VOP1 + if enc <= 0b0111101: return VOP2 + raise ValueError(f"unknown VOP format enc={enc:#09b} word={word:#010x}") + +# ═══════════════════════════════════════════════════════════════════════════════ +# CONSTANTS +# ═══════════════════════════════════════════════════════════════════════════════ -# Decoding helpers SPECIAL_GPRS = {106: "vcc_lo", 107: "vcc_hi", 124: "null", 125: "m0", 126: "exec_lo", 127: "exec_hi", 253: "scc"} SPECIAL_DEC = {**SPECIAL_GPRS, **{v: str(k) for k, v in FLOAT_ENC.items()}} -SPECIAL_PAIRS = {106: "vcc", 126: "exec"} # Special register pairs (for 64-bit ops) -# GFX11 hwreg names (IDs 16-17 are TBA - not supported, IDs 18-19 are PERF_SNAPSHOT) -HWREG_NAMES = {1: 'HW_REG_MODE', 2: 'HW_REG_STATUS', 3: 'HW_REG_TRAPSTS', 4: 'HW_REG_HW_ID', 5: 'HW_REG_GPR_ALLOC', - 6: 'HW_REG_LDS_ALLOC', 7: 'HW_REG_IB_STS', 15: 'HW_REG_SH_MEM_BASES', 18: 'HW_REG_PERF_SNAPSHOT_PC_LO', - 19: 'HW_REG_PERF_SNAPSHOT_PC_HI', 20: 'HW_REG_FLAT_SCR_LO', 21: 'HW_REG_FLAT_SCR_HI', - 22: 'HW_REG_XNACK_MASK', 23: 'HW_REG_HW_ID1', 24: 'HW_REG_HW_ID2', 25: 'HW_REG_POPS_PACKER', 28: 'HW_REG_IB_STS2'} -HWREG_IDS = {v.lower(): k for k, v in HWREG_NAMES.items()} # Reverse map for assembler -MSG_NAMES = {128: 'MSG_RTN_GET_DOORBELL', 129: 'MSG_RTN_GET_DDID', 130: 'MSG_RTN_GET_TMA', - 131: 'MSG_RTN_GET_REALTIME', 132: 'MSG_RTN_SAVE_WAVE', 133: 'MSG_RTN_GET_TBA'} -_16BIT_TYPES = ('f16', 'i16', 'u16', 'b16') -def _is_16bit(s: str) -> bool: return any(s.endswith(x) for x in _16BIT_TYPES) +SPECIAL_PAIRS = {106: "vcc", 126: "exec"} +HWREG = {1: 'HW_REG_MODE', 2: 'HW_REG_STATUS', 3: 'HW_REG_TRAPSTS', 4: 'HW_REG_HW_ID', 5: 'HW_REG_GPR_ALLOC', + 6: 'HW_REG_LDS_ALLOC', 7: 'HW_REG_IB_STS', 15: 'HW_REG_SH_MEM_BASES', 18: 'HW_REG_PERF_SNAPSHOT_PC_LO', + 19: 'HW_REG_PERF_SNAPSHOT_PC_HI', 20: 'HW_REG_FLAT_SCR_LO', 21: 'HW_REG_FLAT_SCR_HI', 22: 'HW_REG_XNACK_MASK', + 23: 'HW_REG_HW_ID1', 24: 'HW_REG_HW_ID2', 25: 'HW_REG_POPS_PACKER', 28: 'HW_REG_IB_STS2'} +HWREG_IDS = {v.lower(): k for k, v in HWREG.items()} +MSG = {128: 'MSG_RTN_GET_DOORBELL', 129: 'MSG_RTN_GET_DDID', 130: 'MSG_RTN_GET_TMA', + 131: 'MSG_RTN_GET_REALTIME', 132: 'MSG_RTN_SAVE_WAVE', 133: 'MSG_RTN_GET_TBA'} +VOP3SD_OPS = {288, 289, 290, 764, 765, 766, 767, 768, 769, 770} + +# ═══════════════════════════════════════════════════════════════════════════════ +# HELPERS +# ═══════════════════════════════════════════════════════════════════════════════ def decode_src(val: int) -> str: if val <= 105: return f"s{val}" @@ -28,744 +72,596 @@ def decode_src(val: int) -> str: if 256 <= val <= 511: return f"v{val - 256}" return "lit" if val == 255 else f"?{val}" -def _reg(prefix: str, base: int, cnt: int = 1) -> str: return f"{prefix}{base}" if cnt == 1 else f"{prefix}[{base}:{base+cnt-1}]" -def _sreg(base: int, cnt: int = 1) -> str: return _reg("s", base, cnt) -def _vreg(base: int, cnt: int = 1) -> str: return _reg("v", base, cnt) +def _reg(p: str, b: int, n: int = 1) -> str: return f"{p}{b}" if n == 1 else f"{p}[{b}:{b+n-1}]" +def _sreg(b: int, n: int = 1) -> str: return _reg("s", b, n) +def _vreg(b: int, n: int = 1) -> str: return _reg("v", b, n) +def _hl(v: int, hi_thresh: int = 128) -> str: return 'h' if v >= hi_thresh else 'l' -def _fmt_sdst(v: int, cnt: int = 1) -> str: - """Format SGPR destination with special register names.""" +def _fmt_sdst(v: int, n: int = 1) -> str: if v == 124: return "null" - if 108 <= v <= 123: return _reg("ttmp", v - 108, cnt) - if cnt > 1 and v in SPECIAL_PAIRS: return SPECIAL_PAIRS[v] - if cnt > 1: return _sreg(v, cnt) + if 108 <= v <= 123: return _reg("ttmp", v - 108, n) + if n > 1: return SPECIAL_PAIRS.get(v) or _sreg(v, n) return {126: "exec_lo", 127: "exec_hi", 106: "vcc_lo", 107: "vcc_hi", 125: "m0"}.get(v, f"s{v}") -def _fmt_ssrc(v: int, cnt: int = 1) -> str: - """Format SGPR source with special register names and pairs.""" - if cnt == 2: - if v in SPECIAL_PAIRS: return SPECIAL_PAIRS[v] - if v <= 105: return _sreg(v, 2) - if 108 <= v <= 123: return _reg("ttmp", v - 108, 2) +def _fmt_src(v: int, n: int = 1) -> str: + if n == 1: return decode_src(v) + if v >= 256: return _vreg(v - 256, n) + if v <= 105: return _sreg(v, n) + if n == 2 and v in SPECIAL_PAIRS: return SPECIAL_PAIRS[v] + if 108 <= v <= 123: return _reg("ttmp", v - 108, n) return decode_src(v) -def _fmt_src_n(v: int, cnt: int) -> str: - """Format source with given register count (1, 2, or 4).""" - if cnt == 1: return decode_src(v) - if v >= 256: return _vreg(v - 256, cnt) - if v <= 105: return _sreg(v, cnt) - if cnt == 2 and v in SPECIAL_PAIRS: return SPECIAL_PAIRS[v] - if 108 <= v <= 123: return _reg("ttmp", v - 108, cnt) - return decode_src(v) +def _fmt_v16(v: int, base: int = 256, hi_thresh: int = 384) -> str: + return f"v{(v - base) & 0x7f}.{_hl(v, hi_thresh)}" -def _fmt_src64(v: int) -> str: - """Format 64-bit source (VGPR pair, SGPR pair, or special pair).""" - return _fmt_src_n(v, 2) - -def _parse_sop_sizes(op_name: str) -> tuple[int, ...]: - """Parse dst and src sizes from SOP instruction name. Returns (dst_cnt, src0_cnt) or (dst_cnt, src0_cnt, src1_cnt).""" - if op_name in ('s_bitset0_b64', 's_bitset1_b64'): return (2, 1) - if op_name in ('s_lshl_b64', 's_lshr_b64', 's_ashr_i64', 's_bfe_u64', 's_bfe_i64'): return (2, 2, 1) - if op_name in ('s_bfm_b64',): return (2, 1, 1) - # SOPC: s_bitcmp0_b64, s_bitcmp1_b64 - 64-bit src0, 32-bit src1 (bit index) - if op_name in ('s_bitcmp0_b64', 's_bitcmp1_b64'): return (1, 2, 1) - if m := re.search(r'_(b|i|u)(32|64)_(b|i|u)(32|64)$', op_name): - return (2 if m.group(2) == '64' else 1, 2 if m.group(4) == '64' else 1) - if m := re.search(r'_(b|i|u)(32|64)$', op_name): - sz = 2 if m.group(2) == '64' else 1 - return (sz, sz) - return (1, 1) - -# Waitcnt helpers (RDNA3 format: bits 15:10=vmcnt, bits 9:4=lgkmcnt, bits 3:0=expcnt) def waitcnt(vmcnt: int = 0x3f, expcnt: int = 0x7, lgkmcnt: int = 0x3f) -> int: return (expcnt & 0x7) | ((lgkmcnt & 0x3f) << 4) | ((vmcnt & 0x3f) << 10) -def decode_waitcnt(val: int) -> tuple[int, int, int]: - return (val >> 10) & 0x3f, val & 0xf, (val >> 4) & 0x3f # vmcnt, expcnt, lgkmcnt -# VOP3SD opcodes (shared encoding with VOP3 but different field layout) -# Note: opcodes 0-255 are VOPC promoted to VOP3 - never treat as VOP3SD -VOP3SD_OPCODES = {288, 289, 290, 764, 765, 766, 767, 768, 769, 770} +def _has(op: str, *subs) -> bool: return any(s in op for s in subs) +def _is16(op: str) -> bool: return _has(op, 'f16', 'i16', 'u16', 'b16') and not _has(op, '_f32', '_i32') +def _is64(op: str) -> bool: return _has(op, 'f64', 'i64', 'u64', 'b64') +def _omod(v: int) -> str: return {1: " mul:2", 2: " mul:4", 3: " div:2"}.get(v, "") +def _mods(*pairs) -> str: return " ".join(m for c, m in pairs if c) +def _fmt_bits(label: str, val: int, count: int) -> str: return f"{label}:[{','.join(str((val >> i) & 1) for i in range(count))}]" -# Disassembler -def disasm(inst: Inst) -> str: - op_val = unwrap(inst._values.get('op', 0)) - cls_name = inst.__class__.__name__ - # VOP3 and VOP3SD share encoding - check opcode to determine which - is_vop3sd = cls_name == 'VOP3' and op_val in VOP3SD_OPCODES - try: - from extra.assembly.amd.autogen import rdna3 as autogen - if is_vop3sd: - op_name = autogen.VOP3SDOp(op_val).name.lower() - else: - op_name = getattr(autogen, f"{cls_name}Op")(op_val).name.lower() if hasattr(autogen, f"{cls_name}Op") else f"op_{op_val}" - except (ValueError, KeyError): op_name = f"op_{op_val}" - def fmt_src(v): return f"0x{inst._literal:x}" if v == 255 and inst._literal is not None else decode_src(v) +def _vop3_src(inst, v: int, neg: int, abs_: int, hi: int, n: int, f16: bool, any_hi: bool) -> str: + """Format VOP3 source operand with modifiers.""" + if n > 1: s = _fmt_src(v, n) + elif f16 and v >= 256: s = f"v{v - 256}.h" if hi else (f"v{v - 256}.l" if any_hi else inst.lit(v)) + else: s = inst.lit(v) + if abs_: s = f"|{s}|" + return f"-{s}" if neg else s - # VOP1 - if cls_name == 'VOP1': - vdst, src0 = unwrap(inst._values['vdst']), unwrap(inst._values['src0']) - if op_name == 'v_nop': return 'v_nop' - if op_name == 'v_pipeflush': return 'v_pipeflush' - parts = op_name.split('_') - is_16bit_dst = any(p in _16BIT_TYPES for p in parts[-2:-1]) or (len(parts) >= 2 and parts[-1] in _16BIT_TYPES and 'cvt' not in op_name) - is_16bit_src = parts[-1] in _16BIT_TYPES and 'sat_pk' not in op_name - _F64_OPS = ('v_ceil_f64', 'v_floor_f64', 'v_fract_f64', 'v_frexp_mant_f64', 'v_rcp_f64', 'v_rndne_f64', 'v_rsq_f64', 'v_sqrt_f64', 'v_trunc_f64') - is_f64_dst = op_name in _F64_OPS or op_name in ('v_cvt_f64_f32', 'v_cvt_f64_i32', 'v_cvt_f64_u32') - is_f64_src = op_name in _F64_OPS or op_name in ('v_cvt_f32_f64', 'v_cvt_i32_f64', 'v_cvt_u32_f64', 'v_frexp_exp_i32_f64') - if op_name == 'v_readfirstlane_b32': - return f"v_readfirstlane_b32 {decode_src(vdst)}, v{src0 - 256 if src0 >= 256 else src0}" - dst_str = _vreg(vdst, 2) if is_f64_dst else f"v{vdst & 0x7f}.{'h' if vdst >= 128 else 'l'}" if is_16bit_dst else f"v{vdst}" - src_str = _fmt_src64(src0) if is_f64_src else f"v{(src0 - 256) & 0x7f}.{'h' if src0 >= 384 else 'l'}" if is_16bit_src and src0 >= 256 else fmt_src(src0) - return f"{op_name}_e32 {dst_str}, {src_str}" +def _opsel_str(opsel: int, n: int, need: bool, is16_d: bool) -> str: + """Format op_sel modifier string.""" + if not need: return "" + if is16_d and (opsel & 8): return f" op_sel:[1,1,1{',1' if n == 3 else ''}]" + if n == 3: return f" op_sel:[{opsel & 1},{(opsel >> 1) & 1},{(opsel >> 2) & 1},{(opsel >> 3) & 1}]" + return f" op_sel:[{opsel & 1},{(opsel >> 1) & 1},{(opsel >> 2) & 1}]" - # VOP2 - if cls_name == 'VOP2': - vdst, src0_raw, vsrc1 = unwrap(inst._values['vdst']), unwrap(inst._values['src0']), unwrap(inst._values['vsrc1']) - suffix = "" if op_name == "v_dot2acc_f32_f16" else "_e32" - is_16bit_op = ('_f16' in op_name or '_i16' in op_name or '_u16' in op_name) and '_f32' not in op_name and '_i32' not in op_name and 'pk_' not in op_name - if is_16bit_op: - dst_str = f"v{vdst & 0x7f}.{'h' if vdst >= 128 else 'l'}" - src0_str = f"v{(src0_raw - 256) & 0x7f}.{'h' if src0_raw >= 384 else 'l'}" if src0_raw >= 256 else fmt_src(src0_raw) - vsrc1_str = f"v{vsrc1 & 0x7f}.{'h' if vsrc1 >= 128 else 'l'}" - else: - dst_str, src0_str, vsrc1_str = f"v{vdst}", fmt_src(src0_raw), f"v{vsrc1}" - return f"{op_name}{suffix} {dst_str}, {src0_str}, {vsrc1_str}" + (", vcc_lo" if op_name == "v_cndmask_b32" else "") +# ═══════════════════════════════════════════════════════════════════════════════ +# DISASSEMBLER +# ═══════════════════════════════════════════════════════════════════════════════ - # VOPC - if cls_name == 'VOPC': - src0, vsrc1 = unwrap(inst._values['src0']), unwrap(inst._values['vsrc1']) - is_64bit = any(x in op_name for x in ('f64', 'i64', 'u64')) - is_64bit_vsrc1 = is_64bit and 'class' not in op_name - is_16bit = any(x in op_name for x in ('_f16', '_i16', '_u16')) and 'f32' not in op_name - is_cmpx = op_name.startswith('v_cmpx') # VOPCX writes to exec, no vcc destination - src0_str = _fmt_src64(src0) if is_64bit else f"v{(src0 - 256) & 0x7f}.{'h' if src0 >= 384 else 'l'}" if is_16bit and src0 >= 256 else fmt_src(src0) - vsrc1_str = _vreg(vsrc1, 2) if is_64bit_vsrc1 else f"v{vsrc1 & 0x7f}.{'h' if vsrc1 >= 128 else 'l'}" if is_16bit else f"v{vsrc1}" - return f"{op_name}_e32 {src0_str}, {vsrc1_str}" if is_cmpx else f"{op_name}_e32 vcc_lo, {src0_str}, {vsrc1_str}" +def _disasm_vop1(inst: VOP1) -> str: + op = VOP1Op(inst.op) + if op in (VOP1Op.V_NOP, VOP1Op.V_PIPEFLUSH): return op.name.lower() + F64_OPS = {VOP1Op.V_CEIL_F64, VOP1Op.V_FLOOR_F64, VOP1Op.V_FRACT_F64, VOP1Op.V_FREXP_MANT_F64, VOP1Op.V_RCP_F64, VOP1Op.V_RNDNE_F64, VOP1Op.V_RSQ_F64, VOP1Op.V_SQRT_F64, VOP1Op.V_TRUNC_F64} + is_f64_d = op in F64_OPS or op in (VOP1Op.V_CVT_F64_F32, VOP1Op.V_CVT_F64_I32, VOP1Op.V_CVT_F64_U32) + is_f64_s = op in F64_OPS or op in (VOP1Op.V_CVT_F32_F64, VOP1Op.V_CVT_I32_F64, VOP1Op.V_CVT_U32_F64, VOP1Op.V_FREXP_EXP_I32_F64) + name = op.name.lower() + parts = name.split('_') + is_16d = any(p in ('f16','i16','u16','b16') for p in parts[-2:-1]) or (len(parts) >= 2 and parts[-1] in ('f16','i16','u16','b16') and 'cvt' not in name) + is_16s = parts[-1] in ('f16','i16','u16','b16') and 'sat_pk' not in name + if op == VOP1Op.V_READFIRSTLANE_B32: return f"v_readfirstlane_b32 {decode_src(inst.vdst)}, v{inst.src0 - 256 if inst.src0 >= 256 else inst.src0}" + dst = _vreg(inst.vdst, 2) if is_f64_d else _fmt_v16(inst.vdst, 0, 128) if is_16d else f"v{inst.vdst}" + src = _fmt_src(inst.src0, 2) if is_f64_s else _fmt_v16(inst.src0) if is_16s and inst.src0 >= 256 else inst.lit(inst.src0) + return f"{name}_e32 {dst}, {src}" - # SOPP - if cls_name == 'SOPP': - simm16 = unwrap(inst._values.get('simm16', 0)) - # No-operand instructions (simm16 is ignored) - no_imm_ops = ('s_endpgm', 's_barrier', 's_wakeup', 's_icache_inv', 's_ttracedata', 's_ttracedata_imm', - 's_wait_idle', 's_endpgm_saved', 's_code_end', 's_endpgm_ordered_ps_done') - if op_name in no_imm_ops: return op_name - if op_name == 's_waitcnt': - vmcnt, expcnt, lgkmcnt = decode_waitcnt(simm16) - parts = [] - if vmcnt != 0x3f: parts.append(f"vmcnt({vmcnt})") - if expcnt != 0x7: parts.append(f"expcnt({expcnt})") - if lgkmcnt != 0x3f: parts.append(f"lgkmcnt({lgkmcnt})") - return f"s_waitcnt {' '.join(parts)}" if parts else "s_waitcnt 0" - if op_name == 's_delay_alu': - dep_names = ['VALU_DEP_1','VALU_DEP_2','VALU_DEP_3','VALU_DEP_4','TRANS32_DEP_1','TRANS32_DEP_2','TRANS32_DEP_3','FMA_ACCUM_CYCLE_1','SALU_CYCLE_1','SALU_CYCLE_2','SALU_CYCLE_3'] - skip_names = ['SAME','NEXT','SKIP_1','SKIP_2','SKIP_3','SKIP_4'] - id0, skip, id1 = simm16 & 0xf, (simm16 >> 4) & 0x7, (simm16 >> 7) & 0xf - def dep_name(v): return dep_names[v-1] if 0 < v <= len(dep_names) else str(v) - parts = [f"instid0({dep_name(id0)})"] if id0 else [] - if skip: parts.append(f"instskip({skip_names[skip]})") - if id1: parts.append(f"instid1({dep_name(id1)})") - return f"s_delay_alu {' | '.join(p for p in parts if p)}" if parts else "s_delay_alu 0" - if op_name.startswith('s_cbranch') or op_name.startswith('s_branch'): - return f"{op_name} {simm16}" - # Most SOPP ops require immediate (s_nop, s_setkill, s_sethalt, s_sleep, s_setprio, s_sendmsg*, etc.) - return f"{op_name} 0x{simm16:x}" +def _disasm_vop2(inst: VOP2) -> str: + op = VOP2Op(inst.op) + name = op.name.lower() + suf = "" if op == VOP2Op.V_DOT2ACC_F32_F16 else "_e32" + is16 = _is16(name) and 'pk_' not in name + # fmaak: dst = src0 * vsrc1 + K, fmamk: dst = src0 * K + vsrc1 + if op in (VOP2Op.V_FMAAK_F32, VOP2Op.V_FMAAK_F16): return f"{name}{suf} v{inst.vdst}, {inst.lit(inst.src0)}, v{inst.vsrc1}, 0x{inst._literal:x}" + if op in (VOP2Op.V_FMAMK_F32, VOP2Op.V_FMAMK_F16): return f"{name}{suf} v{inst.vdst}, {inst.lit(inst.src0)}, 0x{inst._literal:x}, v{inst.vsrc1}" + if is16: return f"{name}{suf} {_fmt_v16(inst.vdst, 0, 128)}, {_fmt_v16(inst.src0) if inst.src0 >= 256 else inst.lit(inst.src0)}, {_fmt_v16(inst.vsrc1, 0, 128)}" + return f"{name}{suf} v{inst.vdst}, {inst.lit(inst.src0)}, v{inst.vsrc1}" + (", vcc_lo" if op == VOP2Op.V_CNDMASK_B32 else "") - # SMEM - if cls_name == 'SMEM': - if op_name in ('s_gl1_inv', 's_dcache_inv'): return op_name - sdata, sbase, soffset, offset = unwrap(inst._values['sdata']), unwrap(inst._values['sbase']), unwrap(inst._values['soffset']), unwrap(inst._values.get('offset', 0)) - glc, dlc = unwrap(inst._values.get('glc', 0)), unwrap(inst._values.get('dlc', 0)) - # Format offset: "soffset offset:X" if both, "0x{offset:x}" if only imm, or decode_src(soffset) - off_str = f"{decode_src(soffset)} offset:0x{offset:x}" if offset and soffset != 124 else f"0x{offset:x}" if offset else decode_src(soffset) - sbase_idx, sbase_cnt = sbase * 2, 4 if (8 <= op_val <= 12 or op_name == 's_atc_probe_buffer') else 2 - sbase_str = _fmt_ssrc(sbase_idx, sbase_cnt) if sbase_cnt == 2 else _sreg(sbase_idx, sbase_cnt) if sbase_idx <= 105 else _reg("ttmp", sbase_idx - 108, sbase_cnt) - if op_name in ('s_atc_probe', 's_atc_probe_buffer'): return f"{op_name} {sdata}, {sbase_str}, {off_str}" - width = {0:1, 1:2, 2:4, 3:8, 4:16, 8:1, 9:2, 10:4, 11:8, 12:16}.get(op_val, 1) - mods = [m for m in ["glc" if glc else "", "dlc" if dlc else ""] if m] - return f"{op_name} {_fmt_sdst(sdata, width)}, {sbase_str}, {off_str}" + (" " + " ".join(mods) if mods else "") +VOPC_CLASS = {VOPCOp.V_CMP_CLASS_F16, VOPCOp.V_CMP_CLASS_F32, VOPCOp.V_CMP_CLASS_F64, + VOPCOp.V_CMPX_CLASS_F16, VOPCOp.V_CMPX_CLASS_F32, VOPCOp.V_CMPX_CLASS_F64} - # FLAT - if cls_name == 'FLAT': - vdst, addr, data, saddr, offset, seg = [unwrap(inst._values.get(f, 0)) for f in ['vdst', 'addr', 'data', 'saddr', 'offset', 'seg']] - instr = f"{['flat', 'scratch', 'global'][seg] if seg < 3 else 'flat'}_{op_name.split('_', 1)[1] if '_' in op_name else op_name}" - width = {'b32':1, 'b64':2, 'b96':3, 'b128':4, 'u8':1, 'i8':1, 'u16':1, 'i16':1}.get(op_name.split('_')[-1], 1) - addr_str = _vreg(addr, 2) if saddr == 0x7F else _vreg(addr) - saddr_str = "" if saddr == 0x7F else f", {_sreg(saddr, 2)}" if saddr < 106 else ", off" if saddr == 124 else f", {decode_src(saddr)}" - off_str = f" offset:{offset}" if offset else "" - vdata_str = _vreg(data if 'store' in op_name else vdst, width) - return f"{instr} {addr_str}, {vdata_str}{saddr_str}{off_str}" if 'store' in op_name else f"{instr} {vdata_str}, {addr_str}{saddr_str}{off_str}" +def _disasm_vopc(inst: VOPC) -> str: + op = VOPCOp(inst.op) + name = op.name.lower() + is64, is16 = _is64(name), _is16(name) + s0 = _fmt_src(inst.src0, 2) if is64 else _fmt_v16(inst.src0) if is16 and inst.src0 >= 256 else inst.lit(inst.src0) + s1 = _vreg(inst.vsrc1, 2) if is64 and op not in VOPC_CLASS else _fmt_v16(inst.vsrc1, 0, 128) if is16 else f"v{inst.vsrc1}" + return f"{name}_e32 {s0}, {s1}" if op.value >= 128 else f"{name}_e32 vcc_lo, {s0}, {s1}" - # VOP3: vector ops with modifiers (can be 1, 2, or 3 sources depending on opcode range) - if cls_name == 'VOP3': - # Handle VOP3SD opcodes (same encoding, different field layout) - if is_vop3sd: - vdst = unwrap(inst._values.get('vdst', 0)) - # VOP3SD: sdst is at bits [14:8], but VOP3 decodes opsel at [14:11], abs at [10:8], clmp at [15] - # We need to reconstruct sdst from these fields - opsel_raw = unwrap(inst._values.get('opsel', 0)) - abs_raw = unwrap(inst._values.get('abs', 0)) - clmp_raw = unwrap(inst._values.get('clmp', 0)) - sdst = (clmp_raw << 7) | (opsel_raw << 3) | abs_raw - src0, src1, src2 = [unwrap(inst._values.get(f, 0)) for f in ('src0', 'src1', 'src2')] - neg = unwrap(inst._values.get('neg', 0)) - omod = unwrap(inst._values.get('omod', 0)) - omod_str = {1: " mul:2", 2: " mul:4", 3: " div:2"}.get(omod, "") - is_f64 = 'f64' in op_name - # v_mad_i64_i32/v_mad_u64_u32: 64-bit dst and src2, 32-bit src0/src1 - is_mad64 = 'mad_i64_i32' in op_name or 'mad_u64_u32' in op_name - def fmt_sd_src(v, neg_bit, is_64bit=False): - s = _fmt_src64(v) if (is_64bit or is_f64) else fmt_src(v) - return f"-{s}" if neg_bit else s - src0_str, src1_str = fmt_sd_src(src0, neg & 1), fmt_sd_src(src1, neg & 2) - src2_str = fmt_sd_src(src2, neg & 4, is_mad64) - dst_str = _vreg(vdst, 2) if (is_f64 or is_mad64) else f"v{vdst}" - sdst_str = _fmt_sdst(sdst, 1) - # v_add_co_u32, v_sub_co_u32, v_subrev_co_u32 only use 2 sources - if op_name in ('v_add_co_u32', 'v_sub_co_u32', 'v_subrev_co_u32'): - return f"{op_name} {dst_str}, {sdst_str}, {src0_str}, {src1_str}" - # v_add_co_ci_u32, v_sub_co_ci_u32, v_subrev_co_ci_u32 use 3 sources (src2 is carry-in) - if op_name in ('v_add_co_ci_u32', 'v_sub_co_ci_u32', 'v_subrev_co_ci_u32'): - return f"{op_name} {dst_str}, {sdst_str}, {src0_str}, {src1_str}, {src2_str}" - # v_div_scale uses 3 sources - return f"{op_name} {dst_str}, {sdst_str}, {src0_str}, {src1_str}, {src2_str}" + omod_str +NO_ARG_SOPP = {SOPPOp.S_ENDPGM, SOPPOp.S_BARRIER, SOPPOp.S_WAKEUP, SOPPOp.S_ICACHE_INV, + SOPPOp.S_WAIT_IDLE, SOPPOp.S_ENDPGM_SAVED, SOPPOp.S_CODE_END, SOPPOp.S_ENDPGM_ORDERED_PS_DONE} - vdst = unwrap(inst._values.get('vdst', 0)) - src0, src1, src2 = [unwrap(inst._values.get(f, 0)) for f in ('src0', 'src1', 'src2')] - neg, abs_, clmp = unwrap(inst._values.get('neg', 0)), unwrap(inst._values.get('abs', 0)), unwrap(inst._values.get('clmp', 0)) - opsel = unwrap(inst._values.get('opsel', 0)) - # Check if 64-bit op (needs register pairs) - is_f64 = 'f64' in op_name or 'i64' in op_name or 'u64' in op_name or 'b64' in op_name - # v_cmp_class_* has 64-bit src0 but 32-bit src1 (class mask) - is_class = 'class' in op_name - # Shift ops: v_*rev_*64 have 32-bit shift amount (src0), 64-bit value (src1) - is_shift64 = 'rev' in op_name and '64' in op_name and op_name.startswith('v_') - # v_ldexp_f64: 64-bit src0 (mantissa), 32-bit src1 (exponent) - is_ldexp64 = op_name == 'v_ldexp_f64' - # v_trig_preop_f64: 64-bit dst/src0, 32-bit src1 (exponent/scale) - is_trig_preop = op_name == 'v_trig_preop_f64' - # v_readlane_b32: destination is SGPR (despite vdst field) - is_readlane = op_name == 'v_readlane_b32' - # SAD/QSAD/MQSAD instructions have mixed sizes - # v_qsad_pk_u16_u8, v_mqsad_pk_u16_u8: 64-bit dst/src0/src2, 32-bit src1 - # v_mqsad_u32_u8: 128-bit (4 reg) dst/src2, 64-bit src0, 32-bit src1 - is_sad64 = any(x in op_name for x in ('qsad_pk', 'mqsad_pk')) - is_mqsad_u32 = 'mqsad_u32' in op_name - # Detect 16-bit and 64-bit operand sizes for various instruction patterns - if 'cvt_pk' in op_name: - is_f16_dst, is_f16_src, is_f16_src2 = False, op_name.endswith('16'), False - elif m := re.match(r'v_(?:cvt|frexp_exp)_([a-z0-9_]+)_([a-z0-9]+)', op_name): - dst_type, src_type = m.group(1), m.group(2) - is_f16_dst, is_f16_src, is_f16_src2 = _is_16bit(dst_type), _is_16bit(src_type), _is_16bit(src_type) - is_f64_dst, is_f64_src, is_f64 = '64' in dst_type, '64' in src_type, False - elif re.match(r'v_mad_[iu]32_[iu]16', op_name): - is_f16_dst, is_f16_src, is_f16_src2 = False, True, False # 32-bit dst, 16-bit src0/src1, 32-bit src2 - elif 'pack_b32' in op_name: - is_f16_dst, is_f16_src, is_f16_src2 = False, True, True # 32-bit dst, 16-bit sources - else: - is_16bit_op = any(x in op_name for x in _16BIT_TYPES) and not any(x in op_name for x in ('dot2', 'pk_', 'sad', 'msad', 'qsad', 'mqsad')) - is_f16_dst = is_f16_src = is_f16_src2 = is_16bit_op - # Check if any opsel bit is set (any operand uses .h) - if so, we need explicit .l for low-half - any_hi = opsel != 0 - def fmt_vop3_src(v, neg_bit, abs_bit, hi_bit=False, reg_cnt=1, is_16=False): - s = _fmt_src_n(v, reg_cnt) if reg_cnt > 1 else f"v{v - 256}.h" if is_16 and v >= 256 and hi_bit else f"v{v - 256}.l" if is_16 and v >= 256 and any_hi else fmt_src(v) - if abs_bit: s = f"|{s}|" - return f"-{s}" if neg_bit else s - # Determine register count for each source (check for cvt-specific 64-bit flags first) - is_src0_64 = locals().get('is_f64_src', is_f64 and not is_shift64) or is_sad64 or is_mqsad_u32 - is_src1_64 = is_f64 and not is_class and not is_ldexp64 and not is_trig_preop - src0_cnt = 2 if is_src0_64 else 1 - src1_cnt = 2 if is_src1_64 else 1 - src2_cnt = 4 if is_mqsad_u32 else 2 if (is_f64 or is_sad64) else 1 - src0_str = fmt_vop3_src(src0, neg & 1, abs_ & 1, opsel & 1, src0_cnt, is_f16_src) - src1_str = fmt_vop3_src(src1, neg & 2, abs_ & 2, opsel & 2, src1_cnt, is_f16_src) - src2_str = fmt_vop3_src(src2, neg & 4, abs_ & 4, opsel & 4, src2_cnt, is_f16_src2) - # Format destination - for 16-bit ops, use .h/.l suffix; readlane uses SGPR dest - is_dst_64 = locals().get('is_f64_dst', is_f64) or is_sad64 - dst_cnt = 4 if is_mqsad_u32 else 2 if is_dst_64 else 1 - if is_readlane: - dst_str = _fmt_sdst(vdst, 1) - elif dst_cnt > 1: - dst_str = _vreg(vdst, dst_cnt) - elif is_f16_dst: - dst_str = f"v{vdst}.h" if (opsel & 8) else f"v{vdst}.l" if any_hi else f"v{vdst}" - else: - dst_str = f"v{vdst}" - clamp_str = " clamp" if clmp else "" - omod = unwrap(inst._values.get('omod', 0)) - omod_str = {1: " mul:2", 2: " mul:4", 3: " div:2"}.get(omod, "") - # op_sel for non-VGPR sources (when opsel bits are set but source is not a VGPR) - # For 16-bit ops with VGPR sources, opsel is encoded in .h/.l suffix - # For non-VGPR sources or non-16-bit ops, we need explicit op_sel - has_nonvgpr_opsel = (src0 < 256 and (opsel & 1)) or (src1 < 256 and (opsel & 2)) or (src2 < 256 and (opsel & 4)) - need_opsel = has_nonvgpr_opsel or (opsel and not is_f16_src) - # Helper to format opsel string based on source count - def fmt_opsel(num_src): - if not need_opsel: return "" - # When dst is .h (for 16-bit ops) and non-VGPR sources have opsel, use all 1s - if is_f16_dst and (opsel & 8): # dst is .h - return f" op_sel:[1,1,1{',1' if num_src == 3 else ''}]" - # Otherwise output actual opsel values - if num_src == 3: - return f" op_sel:[{opsel & 1},{(opsel >> 1) & 1},{(opsel >> 2) & 1},{(opsel >> 3) & 1}]" - return f" op_sel:[{opsel & 1},{(opsel >> 1) & 1},{(opsel >> 2) & 1}]" - # Determine number of sources based on opcode range: - # 0-255: VOPC promoted (comparison, 2 src, sdst) - # 256-383: VOP2 promoted (2 src) - # 384-511: VOP1 promoted (1 src) - # 512+: Native VOP3 (2 or 3 src depending on instruction) - if op_val < 256: # VOPC promoted - # VOPCX (v_cmpx_*) writes to exec, no explicit destination - if op_name.startswith('v_cmpx'): - return f"{op_name}_e64 {src0_str}, {src1_str}" - return f"{op_name}_e64 {_fmt_sdst(vdst, 1)}, {src0_str}, {src1_str}" - elif op_val < 384: # VOP2 promoted - # v_cndmask_b32 in VOP3 format has 3 sources (src2 is mask selector) - if 'cndmask' in op_name: - return f"{op_name}_e64 {dst_str}, {src0_str}, {src1_str}, {src2_str}" + fmt_opsel(3) + clamp_str + omod_str - return f"{op_name}_e64 {dst_str}, {src0_str}, {src1_str}" + fmt_opsel(2) + clamp_str + omod_str - elif op_val < 512: # VOP1 promoted - if op_name in ('v_nop', 'v_pipeflush'): return f"{op_name}_e64" - return f"{op_name}_e64 {dst_str}, {src0_str}" + fmt_opsel(1) + clamp_str + omod_str - else: # Native VOP3 - determine 2 vs 3 sources based on instruction name - # 3-source ops: fma, mad, min3, max3, med3, div_fixup, div_fmas, sad, msad, qsad, mqsad, lerp, alignbit/byte, cubeid/sc/tc/ma, bfe, bfi, perm_b32, permlane, cndmask - # Note: v_writelane_b32 is 2-src (src0, src1 with vdst as 3rd operand - read-modify-write) - is_3src = any(x in op_name for x in ('fma', 'mad', 'min3', 'max3', 'med3', 'div_fix', 'div_fmas', 'sad', 'lerp', 'align', 'cube', - 'bfe', 'bfi', 'perm_b32', 'permlane', 'cndmask', 'xor3', 'or3', 'add3', 'lshl_or', 'and_or', 'lshl_add', - 'add_lshl', 'xad', 'maxmin', 'minmax', 'dot2', 'cvt_pk_u8', 'mullit')) - if is_3src: - return f"{op_name} {dst_str}, {src0_str}, {src1_str}, {src2_str}" + fmt_opsel(3) + clamp_str + omod_str - return f"{op_name} {dst_str}, {src0_str}, {src1_str}" + fmt_opsel(2) + clamp_str + omod_str +def _disasm_sopp(inst: SOPP) -> str: + op, name = SOPPOp(inst.op), SOPPOp(inst.op).name.lower() + if op in NO_ARG_SOPP: return name + if op == SOPPOp.S_WAITCNT: + vm, exp, lgkm = (inst.simm16 >> 10) & 0x3f, inst.simm16 & 0xf, (inst.simm16 >> 4) & 0x3f + p = [f"vmcnt({vm})" if vm != 0x3f else "", f"expcnt({exp})" if exp != 7 else "", f"lgkmcnt({lgkm})" if lgkm != 0x3f else ""] + return f"s_waitcnt {' '.join(x for x in p if x) or '0'}" + if op == SOPPOp.S_DELAY_ALU: + deps, skips = ['VALU_DEP_1','VALU_DEP_2','VALU_DEP_3','VALU_DEP_4','TRANS32_DEP_1','TRANS32_DEP_2','TRANS32_DEP_3','FMA_ACCUM_CYCLE_1','SALU_CYCLE_1','SALU_CYCLE_2','SALU_CYCLE_3'], ['SAME','NEXT','SKIP_1','SKIP_2','SKIP_3','SKIP_4'] + id0, skip, id1 = inst.simm16 & 0xf, (inst.simm16 >> 4) & 0x7, (inst.simm16 >> 7) & 0xf + dep = lambda v: deps[v-1] if 0 < v <= len(deps) else str(v) + p = [f"instid0({dep(id0)})" if id0 else "", f"instskip({skips[skip]})" if skip else "", f"instid1({dep(id1)})" if id1 else ""] + return f"s_delay_alu {' | '.join(x for x in p if x) or '0'}" + return f"{name} {inst.simm16}" if name.startswith(('s_cbranch', 's_branch')) else f"{name} 0x{inst.simm16:x}" - # VOP3SD: 3-source with scalar destination (v_div_scale_*, v_add_co_u32, v_mad_*64_*32, etc.) - if cls_name == 'VOP3SD': - vdst, sdst = unwrap(inst._values.get('vdst', 0)), unwrap(inst._values.get('sdst', 0)) - src0, src1, src2 = [unwrap(inst._values.get(f, 0)) for f in ('src0', 'src1', 'src2')] - neg, omod, clmp = unwrap(inst._values.get('neg', 0)), unwrap(inst._values.get('omod', 0)), unwrap(inst._values.get('clmp', 0)) - is_f64, is_mad64 = 'f64' in op_name, 'mad_i64_i32' in op_name or 'mad_u64_u32' in op_name - def fmt_neg(v, neg_bit, is_64=False): return f"-{_fmt_src64(v) if (is_64 or is_f64) else fmt_src(v)}" if neg_bit else _fmt_src64(v) if (is_64 or is_f64) else fmt_src(v) - srcs = [fmt_neg(src0, neg & 1), fmt_neg(src1, neg & 2), fmt_neg(src2, neg & 4, is_mad64)] - dst_str, sdst_str = _vreg(vdst, 2) if (is_f64 or is_mad64) else f"v{vdst}", _fmt_sdst(sdst, 1) - clamp_str, omod_str = " clamp" if clmp else "", {1: " mul:2", 2: " mul:4", 3: " div:2"}.get(omod, "") - is_2src = op_name in ('v_add_co_u32', 'v_sub_co_u32', 'v_subrev_co_u32') - suffix = "_e64" if op_name.startswith('v_') and 'co_' in op_name else "" - return f"{op_name}{suffix} {dst_str}, {sdst_str}, {', '.join(srcs[:2] if is_2src else srcs)}" + clamp_str + omod_str +def _disasm_smem(inst: SMEM) -> str: + op = SMEMOp(inst.op) + name = op.name.lower() + if op in (SMEMOp.S_GL1_INV, SMEMOp.S_DCACHE_INV): return name + off_s = f"{decode_src(inst.soffset)} offset:0x{inst.offset:x}" if inst.offset and inst.soffset != 124 else f"0x{inst.offset:x}" if inst.offset else decode_src(inst.soffset) + sbase_idx, sbase_count = inst.sbase * 2, 4 if (8 <= inst.op <= 12 or name == 's_atc_probe_buffer') else 2 + sbase_str = _fmt_src(sbase_idx, sbase_count) if sbase_count == 2 else _sreg(sbase_idx, sbase_count) if sbase_idx <= 105 else _reg("ttmp", sbase_idx - 108, sbase_count) + if name in ('s_atc_probe', 's_atc_probe_buffer'): return f"{name} {inst.sdata}, {sbase_str}, {off_s}" + width = {0:1, 1:2, 2:4, 3:8, 4:16, 8:1, 9:2, 10:4, 11:8, 12:16}.get(inst.op, 1) + return f"{name} {_fmt_sdst(inst.sdata, width)}, {sbase_str}, {off_s}" + _mods((inst.glc, " glc"), (inst.dlc, " dlc")) - # VOPD: dual-issue instructions - if cls_name == 'VOPD': - from extra.assembly.amd.autogen import rdna3 as autogen - opx, opy, vdstx, vdsty_enc = [unwrap(inst._values.get(f, 0)) for f in ('opx', 'opy', 'vdstx', 'vdsty')] - srcx0, vsrcx1, srcy0, vsrcy1 = [unwrap(inst._values.get(f, 0)) for f in ('srcx0', 'vsrcx1', 'srcy0', 'vsrcy1')] - literal = inst._literal if hasattr(inst, '_literal') and inst._literal else unwrap(inst._values.get('literal', None)) - vdsty = (vdsty_enc << 1) | ((vdstx & 1) ^ 1) # Decode vdsty - def fmt_vopd(op, vdst, src0, vsrc1, include_lit): - try: name = autogen.VOPDOp(op).name.lower() - except (ValueError, KeyError): name = f"op_{op}" - lit_str = f", 0x{literal:x}" if include_lit and literal is not None and ('fmaak' in name or 'fmamk' in name) else "" - return f"{name} v{vdst}, {fmt_src(src0)}{lit_str}" if 'mov' in name else f"{name} v{vdst}, {fmt_src(src0)}, v{vsrc1}{lit_str}" - # fmaak/fmamk: both X and Y can use the shared literal - x_needs_lit = 'fmaak' in autogen.VOPDOp(opx).name.lower() or 'fmamk' in autogen.VOPDOp(opx).name.lower() - y_needs_lit = 'fmaak' in autogen.VOPDOp(opy).name.lower() or 'fmamk' in autogen.VOPDOp(opy).name.lower() - return f"{fmt_vopd(opx, vdstx, srcx0, vsrcx1, x_needs_lit)} :: {fmt_vopd(opy, vdsty, srcy0, vsrcy1, y_needs_lit)}" +def _disasm_flat(inst: FLAT) -> str: + name = FLATOp(inst.op).name.lower() + seg = ['flat', 'scratch', 'global'][inst.seg] if inst.seg < 3 else 'flat' + instr = f"{seg}_{name.split('_', 1)[1] if '_' in name else name}" + off_val = inst.offset if seg == 'flat' else (inst.offset if inst.offset < 4096 else inst.offset - 8192) + suffix = name.split('_')[-1] + w = {'b32':1,'b64':2,'b96':3,'b128':4,'u8':1,'i8':1,'u16':1,'i16':1,'u32':1,'i32':1,'u64':2,'i64':2,'f32':1,'f64':2}.get(suffix, 1) + if 'cmpswap' in name: w *= 2 + if name.endswith('_x2') or 'x2' in suffix: w = max(w, 2) + mods = f"{f' offset:{off_val}' if off_val else ''}{' glc' if inst.glc else ''}{' slc' if inst.slc else ''}{' dlc' if inst.dlc else ''}" + # saddr + if seg == 'flat' or inst.saddr == 0x7F: saddr_s = "" + elif inst.saddr == 124: saddr_s = ", off" + elif seg == 'scratch': saddr_s = f", {decode_src(inst.saddr)}" + elif inst.saddr in SPECIAL_PAIRS: saddr_s = f", {SPECIAL_PAIRS[inst.saddr]}" + elif 108 <= inst.saddr <= 123: saddr_s = f", {_reg('ttmp', inst.saddr - 108, 2)}" + else: saddr_s = f", {_sreg(inst.saddr, 2) if inst.saddr < 106 else decode_src(inst.saddr)}" + # addtid: no addr + if 'addtid' in name: return f"{instr} v{inst.data if 'store' in name else inst.vdst}{saddr_s}{mods}" + # addr width + addr_s = "off" if not inst.sve and seg == 'scratch' else _vreg(inst.addr, 1 if seg == 'scratch' or (inst.saddr not in (0x7F, 124)) else 2) + data_s, vdst_s = _vreg(inst.data, w), _vreg(inst.vdst, w // 2 if 'cmpswap' in name else w) + if 'atomic' in name: + return f"{instr} {vdst_s}, {addr_s}, {data_s}{saddr_s if seg != 'flat' else ''}{mods}" if inst.glc else f"{instr} {addr_s}, {data_s}{saddr_s if seg != 'flat' else ''}{mods}" + if 'store' in name: return f"{instr} {addr_s}, {data_s}{saddr_s}{mods}" + return f"{instr} {_vreg(inst.vdst, w)}, {addr_s}{saddr_s}{mods}" - # VOP3P: packed vector ops - if cls_name == 'VOP3P': - vdst, clmp = unwrap(inst._values.get('vdst', 0)), unwrap(inst._values.get('clmp', 0)) - src0, src1, src2 = [unwrap(inst._values.get(f, 0)) for f in ('src0', 'src1', 'src2')] - neg, neg_hi = unwrap(inst._values.get('neg', 0)), unwrap(inst._values.get('neg_hi', 0)) - opsel, opsel_hi, opsel_hi2 = unwrap(inst._values.get('opsel', 0)), unwrap(inst._values.get('opsel_hi', 0)), unwrap(inst._values.get('opsel_hi2', 0)) - is_wmma, is_3src = 'wmma' in op_name, any(x in op_name for x in ('fma', 'mad', 'dot', 'wmma')) - def fmt_bits(name, val, n): return f"{name}:[{','.join(str((val >> i) & 1) for i in range(n))}]" - # WMMA: f16/bf16 use 8-reg sources, iu8 uses 4-reg, iu4 uses 2-reg; all have 8-reg dst - if is_wmma: - src_cnt = 2 if 'iu4' in op_name else 4 if 'iu8' in op_name else 8 - src0_str, src1_str, src2_str = _fmt_src_n(src0, src_cnt), _fmt_src_n(src1, src_cnt), _fmt_src_n(src2, 8) - dst_str = _vreg(vdst, 8) - else: - src0_str, src1_str, src2_str = _fmt_src_n(src0, 1), _fmt_src_n(src1, 1), _fmt_src_n(src2, 1) - dst_str = f"v{vdst}" - n = 3 if is_3src else 2 - full_opsel_hi = opsel_hi | (opsel_hi2 << 2) - mods = [fmt_bits("op_sel", opsel, n)] if opsel else [] - if full_opsel_hi != (0b111 if is_3src else 0b11): mods.append(fmt_bits("op_sel_hi", full_opsel_hi, n)) - if neg: mods.append(fmt_bits("neg_lo", neg, n)) - if neg_hi: mods.append(fmt_bits("neg_hi", neg_hi, n)) - if clmp: mods.append("clamp") - mod_str = " " + " ".join(mods) if mods else "" - return f"{op_name} {dst_str}, {src0_str}, {src1_str}, {src2_str}{mod_str}" if is_3src else f"{op_name} {dst_str}, {src0_str}, {src1_str}{mod_str}" +def _disasm_ds(inst: DS) -> str: + op, name = DSOp(inst.op), DSOp(inst.op).name.lower() + gds = " gds" if inst.gds else "" + off = f" offset:{inst.offset0 | (inst.offset1 << 8)}" if inst.offset0 or inst.offset1 else "" + off2 = f" offset0:{inst.offset0} offset1:{inst.offset1}" if inst.offset0 or inst.offset1 else "" + w = 4 if '128' in name else 3 if '96' in name else 2 if (name.endswith('64') or 'gs_reg' in name) else 1 + d0, d1, dst, addr = _vreg(inst.data0, w), _vreg(inst.data1, w), _vreg(inst.vdst, w), f"v{inst.addr}" - # VINTERP: interpolation instructions - if cls_name == 'VINTERP': - vdst = unwrap(inst._values.get('vdst', 0)) - src0, src1, src2 = [unwrap(inst._values.get(f, 0)) for f in ('src0', 'src1', 'src2')] - neg, waitexp, clmp = unwrap(inst._values.get('neg', 0)), unwrap(inst._values.get('waitexp', 0)), unwrap(inst._values.get('clmp', 0)) - def fmt_neg_vi(v, neg_bit): return f"-{v}" if neg_bit else v - srcs = [fmt_neg_vi(f"v{s - 256}" if s >= 256 else fmt_src(s), neg & (1 << i)) for i, s in enumerate([src0, src1, src2])] - mods = [m for m in [f"wait_exp:{waitexp}" if waitexp else "", "clamp" if clmp else ""] if m] - return f"{op_name} v{vdst}, {', '.join(srcs)}" + (" " + " ".join(mods) if mods else "") + if op == DSOp.DS_NOP: return name + if op == DSOp.DS_BVH_STACK_RTN_B32: return f"{name} v{inst.vdst}, {addr}, v{inst.data0}, {_vreg(inst.data1, 4)}{off}{gds}" + if 'gws_sema' in name and op != DSOp.DS_GWS_SEMA_BR: return f"{name}{off}{gds}" + if 'gws_' in name: return f"{name} {addr}{off}{gds}" + if op in (DSOp.DS_CONSUME, DSOp.DS_APPEND): return f"{name} v{inst.vdst}{off}{gds}" + if 'gs_reg' in name: return f"{name} {_vreg(inst.vdst, 2)}, v{inst.data0}{off}{gds}" + if '2addr' in name: + if 'load' in name: return f"{name} {_vreg(inst.vdst, w*2)}, {addr}{off2}{gds}" + if 'store' in name and 'xchg' not in name: return f"{name} {addr}, {d0}, {d1}{off2}{gds}" + return f"{name} {_vreg(inst.vdst, w*2)}, {addr}, {d0}, {d1}{off2}{gds}" + if 'load' in name: return f"{name} v{inst.vdst}{off}{gds}" if 'addtid' in name else f"{name} {dst}, {addr}{off}{gds}" + if 'store' in name and not _has(name, 'cmp', 'xchg'): + return f"{name} v{inst.data0}{off}{gds}" if 'addtid' in name else f"{name} {addr}, {d0}{off}{gds}" + if 'swizzle' in name or op == DSOp.DS_ORDERED_COUNT: return f"{name} v{inst.vdst}, {addr}{off}{gds}" + if 'permute' in name: return f"{name} v{inst.vdst}, {addr}, v{inst.data0}{off}{gds}" + if 'condxchg' in name: return f"{name} {_vreg(inst.vdst, 2)}, {addr}, {_vreg(inst.data0, 2)}{off}{gds}" + if _has(name, 'cmpstore', 'mskor', 'wrap'): + return f"{name} {dst}, {addr}, {d0}, {d1}{off}{gds}" if '_rtn' in name else f"{name} {addr}, {d0}, {d1}{off}{gds}" + return f"{name} {dst}, {addr}, {d0}{off}{gds}" if '_rtn' in name else f"{name} {addr}, {d0}{off}{gds}" - # MUBUF/MTBUF helpers - def _buf_vaddr(vaddr, offen, idxen): return _vreg(vaddr, 2) if offen and idxen else f"v{vaddr}" if offen or idxen else "off" - def _buf_srsrc(srsrc): srsrc_base = srsrc * 4; return _reg("ttmp", srsrc_base - 108, 4) if 108 <= srsrc_base <= 123 else _sreg(srsrc_base, 4) +def _disasm_vop3(inst: VOP3) -> str: + op = VOP3SDOp(inst.op) if inst.op in VOP3SD_OPS else VOP3Op(inst.op) + name = op.name.lower() - # MUBUF: buffer load/store - if cls_name == 'MUBUF': - vdata, vaddr, srsrc, soffset = [unwrap(inst._values.get(f, 0)) for f in ('vdata', 'vaddr', 'srsrc', 'soffset')] - offset, offen, idxen = unwrap(inst._values.get('offset', 0)), unwrap(inst._values.get('offen', 0)), unwrap(inst._values.get('idxen', 0)) - glc, dlc, slc, tfe = [unwrap(inst._values.get(f, 0)) for f in ('glc', 'dlc', 'slc', 'tfe')] - if op_name in ('buffer_gl0_inv', 'buffer_gl1_inv'): return op_name - # Determine data width from op name - if 'd16' in op_name: width = 2 if any(x in op_name for x in ('xyz', 'xyzw')) else 1 - elif 'atomic' in op_name: - base_width = 2 if any(x in op_name for x in ('b64', 'u64', 'i64')) else 1 - width = base_width * 2 if 'cmpswap' in op_name else base_width - else: width = {'b32':1, 'b64':2, 'b96':3, 'b128':4, 'b16':1, 'x':1, 'xy':2, 'xyz':3, 'xyzw':4}.get(op_name.split('_')[-1], 1) - if tfe: width += 1 - mods = [m for m in ["offen" if offen else "", "idxen" if idxen else "", f"offset:{offset}" if offset else "", - "glc" if glc else "", "dlc" if dlc else "", "slc" if slc else "", "tfe" if tfe else ""] if m] - return f"{op_name} {_vreg(vdata, width)}, {_buf_vaddr(vaddr, offen, idxen)}, {_buf_srsrc(srsrc)}, {decode_src(soffset)}" + (" " + " ".join(mods) if mods else "") + # VOP3SD (shared encoding) + if inst.op in VOP3SD_OPS: + sdst = (inst.clmp << 7) | (inst.opsel << 3) | inst.abs + is64, mad64 = 'f64' in name, _has(name, 'mad_i64_i32', 'mad_u64_u32') + def src(v, neg, ext=False): s = _fmt_src(v, 2) if ext or is64 else inst.lit(v); return f"-{s}" if neg else s + s0, s1, s2 = src(inst.src0, inst.neg & 1), src(inst.src1, inst.neg & 2), src(inst.src2, inst.neg & 4, mad64) + dst = _vreg(inst.vdst, 2) if is64 or mad64 else f"v{inst.vdst}" + if op in (VOP3SDOp.V_ADD_CO_U32, VOP3SDOp.V_SUB_CO_U32, VOP3SDOp.V_SUBREV_CO_U32): return f"{name} {dst}, {_fmt_sdst(sdst, 1)}, {s0}, {s1}" + if op in (VOP3SDOp.V_ADD_CO_CI_U32, VOP3SDOp.V_SUB_CO_CI_U32, VOP3SDOp.V_SUBREV_CO_CI_U32): return f"{name} {dst}, {_fmt_sdst(sdst, 1)}, {s0}, {s1}, {s2}" + return f"{name} {dst}, {_fmt_sdst(sdst, 1)}, {s0}, {s1}, {s2}" + _omod(inst.omod) - # MTBUF: typed buffer load/store - if cls_name == 'MTBUF': - vdata, vaddr, srsrc, soffset = [unwrap(inst._values.get(f, 0)) for f in ('vdata', 'vaddr', 'srsrc', 'soffset')] - offset, tbuf_fmt, offen, idxen = [unwrap(inst._values.get(f, 0)) for f in ('offset', 'format', 'offen', 'idxen')] - glc, dlc, slc = [unwrap(inst._values.get(f, 0)) for f in ('glc', 'dlc', 'slc')] - mods = [f"format:{tbuf_fmt}"] + [m for m in ["idxen" if idxen else "", "offen" if offen else "", f"offset:{offset}" if offset else "", - "glc" if glc else "", "dlc" if dlc else "", "slc" if slc else ""] if m] - width = 2 if 'd16' in op_name and any(x in op_name for x in ('xyz', 'xyzw')) else 1 if 'd16' in op_name else {'x':1, 'xy':2, 'xyz':3, 'xyzw':4}.get(op_name.split('_')[-1], 1) - return f"{op_name} {_vreg(vdata, width)}, {_buf_vaddr(vaddr, offen, idxen)}, {_buf_srsrc(srsrc)}, {decode_src(soffset)} {' '.join(mods)}" + # Detect operand sizes + is64 = _is64(name) + is64_src, is64_dst = False, False + is16_d = is16_s = is16_s2 = False + if 'cvt_pk' in name: is16_s = name.endswith('16') + elif m := re.match(r'v_(?:cvt|frexp_exp)_([a-z0-9_]+)_([a-z0-9]+)', name): + is16_d, is16_s = _has(m.group(1), 'f16','i16','u16','b16'), _has(m.group(2), 'f16','i16','u16','b16') + is64_src, is64_dst = '64' in m.group(2), '64' in m.group(1) + is16_s2, is64 = is16_s, False + elif re.match(r'v_mad_[iu]32_[iu]16', name): is16_s = True + elif 'pack_b32' in name: is16_s = is16_s2 = True + else: is16_d = is16_s = is16_s2 = _is16(name) and not _has(name, 'dot2', 'pk_', 'sad', 'msad', 'qsad', 'mqsad') - # SOP1/SOP2/SOPC/SOPK - if cls_name in ('SOP1', 'SOP2', 'SOPC', 'SOPK'): - sizes = _parse_sop_sizes(op_name) - dst_cnt, src0_cnt = sizes[0], sizes[1] - src1_cnt = sizes[2] if len(sizes) > 2 else src0_cnt - if cls_name == 'SOP1': - sdst, ssrc0 = unwrap(inst._values.get('sdst', 0)), unwrap(inst._values.get('ssrc0', 0)) - if op_name == 's_getpc_b64': return f"{op_name} {_fmt_sdst(sdst, 2)}" - if op_name in ('s_setpc_b64', 's_rfe_b64'): return f"{op_name} {_fmt_ssrc(ssrc0, 2)}" - if op_name == 's_swappc_b64': return f"{op_name} {_fmt_sdst(sdst, 2)}, {_fmt_ssrc(ssrc0, 2)}" - if op_name in ('s_sendmsg_rtn_b32', 's_sendmsg_rtn_b64'): - return f"{op_name} {_fmt_sdst(sdst, 2 if 'b64' in op_name else 1)}, sendmsg({MSG_NAMES.get(ssrc0, str(ssrc0))})" - ssrc0_str = fmt_src(ssrc0) if src0_cnt == 1 else _fmt_ssrc(ssrc0, src0_cnt) - return f"{op_name} {_fmt_sdst(sdst, dst_cnt)}, {ssrc0_str}" - if cls_name == 'SOP2': - sdst, ssrc0, ssrc1 = [unwrap(inst._values.get(f, 0)) for f in ('sdst', 'ssrc0', 'ssrc1')] - ssrc0_str = fmt_src(ssrc0) if ssrc0 == 255 else _fmt_ssrc(ssrc0, src0_cnt) - ssrc1_str = fmt_src(ssrc1) if ssrc1 == 255 else _fmt_ssrc(ssrc1, src1_cnt) - return f"{op_name} {_fmt_sdst(sdst, dst_cnt)}, {ssrc0_str}, {ssrc1_str}" - if cls_name == 'SOPC': - return f"{op_name} {_fmt_ssrc(unwrap(inst._values.get('ssrc0', 0)), src0_cnt)}, {_fmt_ssrc(unwrap(inst._values.get('ssrc1', 0)), src1_cnt)}" - if cls_name == 'SOPK': - sdst, simm16 = unwrap(inst._values.get('sdst', 0)), unwrap(inst._values.get('simm16', 0)) - if op_name == 's_version': return f"{op_name} 0x{simm16:x}" - if op_name in ('s_setreg_b32', 's_getreg_b32'): - hwreg_id, hwreg_offset, hwreg_size = simm16 & 0x3f, (simm16 >> 6) & 0x1f, ((simm16 >> 11) & 0x1f) + 1 - hwreg_str = f"0x{simm16:x}" if hwreg_id in (16, 17) else f"hwreg({HWREG_NAMES.get(hwreg_id, str(hwreg_id))}, {hwreg_offset}, {hwreg_size})" - return f"{op_name} {hwreg_str}, {_fmt_sdst(sdst, 1)}" if op_name == 's_setreg_b32' else f"{op_name} {_fmt_sdst(sdst, 1)}, {hwreg_str}" - return f"{op_name} {_fmt_sdst(sdst, dst_cnt)}, 0x{simm16:x}" + # Source counts + shift64 = 'rev' in name and '64' in name and name.startswith('v_') + ldexp64 = op == VOP3Op.V_LDEXP_F64 + trig = op == VOP3Op.V_TRIG_PREOP_F64 + sad64, mqsad = _has(name, 'qsad_pk', 'mqsad_pk'), 'mqsad_u32' in name + s0n = 2 if ((is64 and not shift64) or sad64 or mqsad or is64_src) else 1 + s1n = 2 if (is64 and not _has(name, 'class') and not ldexp64 and not trig) else 1 + s2n = 4 if mqsad else 2 if (is64 or sad64) else 1 - # Generic fallback - def fmt_field(n, v): - v = unwrap(v) - if n in SRC_FIELDS: return fmt_src(v) if v != 255 else "0xff" - if n in ('sdst', 'vdst'): return f"{'s' if n == 'sdst' else 'v'}{v}" - return f"v{v}" if n == 'vsrc1' else f"0x{v:x}" if n == 'simm16' else str(v) - ops = [fmt_field(n, inst._values.get(n, 0)) for n in inst._fields if n not in ('encoding', 'op')] - return f"{op_name} {', '.join(ops)}" if ops else op_name + any_hi = inst.opsel != 0 + s0 = _vop3_src(inst, inst.src0, inst.neg&1, inst.abs&1, inst.opsel&1, s0n, is16_s, any_hi) + s1 = _vop3_src(inst, inst.src1, inst.neg&2, inst.abs&2, inst.opsel&2, s1n, is16_s, any_hi) + s2 = _vop3_src(inst, inst.src2, inst.neg&4, inst.abs&4, inst.opsel&4, s2n, is16_s2, any_hi) -# Assembler -SPECIAL_REGS = {'vcc_lo': RawImm(106), 'vcc_hi': RawImm(107), 'vcc': RawImm(106), 'null': RawImm(124), 'off': RawImm(124), 'm0': RawImm(125), - 'exec_lo': RawImm(126), 'exec_hi': RawImm(127), 'exec': RawImm(126), 'scc': RawImm(253), 'src_scc': RawImm(253)} -FLOAT_CONSTS = {'0.5': 0.5, '-0.5': -0.5, '1.0': 1.0, '-1.0': -1.0, '2.0': 2.0, '-2.0': -2.0, '4.0': 4.0, '-4.0': -4.0} + # Destination + dn = 4 if mqsad else 2 if (is64 or sad64 or is64_dst) else 1 + if op == VOP3Op.V_READLANE_B32: dst = _fmt_sdst(inst.vdst, 1) + elif dn > 1: dst = _vreg(inst.vdst, dn) + elif is16_d: dst = f"v{inst.vdst}.h" if (inst.opsel & 8) else f"v{inst.vdst}.l" if any_hi else f"v{inst.vdst}" + else: dst = f"v{inst.vdst}" + + cl, om = " clamp" if inst.clmp else "", _omod(inst.omod) + nonvgpr_opsel = (inst.src0 < 256 and (inst.opsel & 1)) or (inst.src1 < 256 and (inst.opsel & 2)) or (inst.src2 < 256 and (inst.opsel & 4)) + need_opsel = nonvgpr_opsel or (inst.opsel and not is16_s) + + if inst.op < 256: # VOPC + return f"{name}_e64 {s0}, {s1}" if name.startswith('v_cmpx') else f"{name}_e64 {_fmt_sdst(inst.vdst, 1)}, {s0}, {s1}" + if inst.op < 384: # VOP2 + os = _opsel_str(inst.opsel, 3, need_opsel, is16_d) if 'cndmask' in name else _opsel_str(inst.opsel, 2, need_opsel, is16_d) + return f"{name}_e64 {dst}, {s0}, {s1}, {s2}{os}{cl}{om}" if 'cndmask' in name else f"{name}_e64 {dst}, {s0}, {s1}{os}{cl}{om}" + if inst.op < 512: # VOP1 + return f"{name}_e64" if op in (VOP3Op.V_NOP, VOP3Op.V_PIPEFLUSH) else f"{name}_e64 {dst}, {s0}{_opsel_str(inst.opsel, 1, need_opsel, is16_d)}{cl}{om}" + # Native VOP3 + is3 = _has(name, 'fma', 'mad', 'min3', 'max3', 'med3', 'div_fix', 'div_fmas', 'sad', 'lerp', 'align', 'cube', 'bfe', 'bfi', + 'perm_b32', 'permlane', 'cndmask', 'xor3', 'or3', 'add3', 'lshl_or', 'and_or', 'lshl_add', 'add_lshl', 'xad', 'maxmin', 'minmax', 'dot2', 'cvt_pk_u8', 'mullit') + os = _opsel_str(inst.opsel, 3 if is3 else 2, need_opsel, is16_d) + return f"{name} {dst}, {s0}, {s1}, {s2}{os}{cl}{om}" if is3 else f"{name} {dst}, {s0}, {s1}{os}{cl}{om}" + +def _disasm_vop3sd(inst: VOP3SD) -> str: + op, name = VOP3SDOp(inst.op), VOP3SDOp(inst.op).name.lower() + is64, mad64 = 'f64' in name, _has(name, 'mad_i64_i32', 'mad_u64_u32') + def src(v, neg, ext=False): s = _fmt_src(v, 2) if ext or is64 else inst.lit(v); return f"-{s}" if neg else s + s0, s1, s2 = src(inst.src0, inst.neg & 1), src(inst.src1, inst.neg & 2), src(inst.src2, inst.neg & 4, mad64) + dst, is2src = _vreg(inst.vdst, 2) if is64 or mad64 else f"v{inst.vdst}", op in (VOP3SDOp.V_ADD_CO_U32, VOP3SDOp.V_SUB_CO_U32, VOP3SDOp.V_SUBREV_CO_U32) + suffix = "_e64" if name.startswith('v_') and 'co_' in name else "" + return f"{name}{suffix} {dst}, {_fmt_sdst(inst.sdst, 1)}, {s0}, {s1}{'' if is2src else f', {s2}'}{' clamp' if inst.clmp else ''}{_omod(inst.omod)}" + +def _disasm_vopd(inst: VOPD) -> str: + lit = inst._literal or inst.literal + vdst_y, nx, ny = (inst.vdsty << 1) | ((inst.vdstx & 1) ^ 1), VOPDOp(inst.opx).name.lower(), VOPDOp(inst.opy).name.lower() + def half(n, vd, s0, vs1): return f"{n} v{vd}, {inst.lit(s0)}{f', 0x{lit:x}' if lit and _has(n, 'fmaak', 'fmamk') else ''}" if 'mov' in n else f"{n} v{vd}, {inst.lit(s0)}, v{vs1}{f', 0x{lit:x}' if lit and _has(n, 'fmaak', 'fmamk') else ''}" + return f"{half(nx, inst.vdstx, inst.srcx0, inst.vsrcx1)} :: {half(ny, vdst_y, inst.srcy0, inst.vsrcy1)}" + +def _disasm_vop3p(inst: VOP3P) -> str: + name = VOP3POp(inst.op).name.lower() + is_wmma, is_3src, is_fma_mix = 'wmma' in name, _has(name, 'fma', 'mad', 'dot', 'wmma'), 'fma_mix' in name + if is_wmma: + sc = 2 if 'iu4' in name else 4 if 'iu8' in name else 8 + src0, src1, src2, dst = _fmt_src(inst.src0, sc), _fmt_src(inst.src1, sc), _fmt_src(inst.src2, 8), _vreg(inst.vdst, 8) + else: src0, src1, src2, dst = _fmt_src(inst.src0, 1), _fmt_src(inst.src1, 1), _fmt_src(inst.src2, 1), f"v{inst.vdst}" + n, opsel_hi = 3 if is_3src else 2, inst.opsel_hi | (inst.opsel_hi2 << 2) + if is_fma_mix: + def m(s, neg, abs_): return f"-{f'|{s}|' if abs_ else s}" if neg else (f"|{s}|" if abs_ else s) + src0, src1, src2 = m(src0, inst.neg & 1, inst.neg_hi & 1), m(src1, inst.neg & 2, inst.neg_hi & 2), m(src2, inst.neg & 4, inst.neg_hi & 4) + mods = ([_fmt_bits("op_sel", inst.opsel, n)] if inst.opsel else []) + ([_fmt_bits("op_sel_hi", opsel_hi, n)] if opsel_hi else []) + (["clamp"] if inst.clmp else []) + else: + mods = ([_fmt_bits("op_sel", inst.opsel, n)] if inst.opsel else []) + ([_fmt_bits("op_sel_hi", opsel_hi, n)] if opsel_hi != (7 if is_3src else 3) else []) + \ + ([_fmt_bits("neg_lo", inst.neg, n)] if inst.neg else []) + ([_fmt_bits("neg_hi", inst.neg_hi, n)] if inst.neg_hi else []) + (["clamp"] if inst.clmp else []) + return f"{name} {dst}, {src0}, {src1}, {src2}{' ' + ' '.join(mods) if mods else ''}" if is_3src else f"{name} {dst}, {src0}, {src1}{' ' + ' '.join(mods) if mods else ''}" + +def _disasm_buf(inst: MUBUF | MTBUF) -> str: + op = MTBUFOp(inst.op) if isinstance(inst, MTBUF) else MUBUFOp(inst.op) + name = op.name.lower() + if op in (MUBUFOp.BUFFER_GL0_INV, MUBUFOp.BUFFER_GL1_INV): return name + w = (2 if _has(name, 'xyz', 'xyzw') else 1) if 'd16' in name else \ + ((2 if _has(name, 'b64', 'u64', 'i64') else 1) * (2 if 'cmpswap' in name else 1)) if 'atomic' in name else \ + {'b32':1,'b64':2,'b96':3,'b128':4,'b16':1,'x':1,'xy':2,'xyz':3,'xyzw':4}.get(name.split('_')[-1], 1) + if inst.tfe: w += 1 + vaddr = _vreg(inst.vaddr, 2) if inst.offen and inst.idxen else f"v{inst.vaddr}" if inst.offen or inst.idxen else "off" + srsrc = _reg("ttmp", inst.srsrc*4 - 108, 4) if 108 <= inst.srsrc*4 <= 123 else _sreg(inst.srsrc*4, 4) + mods = ([f"format:{inst.format}"] if isinstance(inst, MTBUF) else []) + [m for c, m in [(inst.idxen,"idxen"),(inst.offen,"offen"),(inst.offset,f"offset:{inst.offset}"),(inst.glc,"glc"),(inst.dlc,"dlc"),(inst.slc,"slc"),(inst.tfe,"tfe")] if c] + return f"{name} {_vreg(inst.vdata, w)}, {vaddr}, {srsrc}, {decode_src(inst.soffset)}{' ' + ' '.join(mods) if mods else ''}" + +def _mimg_vaddr_width(name: str, dim: int, a16: bool) -> int: + """Calculate vaddr register count for MIMG sample/gather operations.""" + # 1d,2d,3d,cube,1d_arr,2d_arr,2d_msaa,2d_msaa_arr + base = [1, 2, 3, 3, 2, 3, 3, 4][dim] # address coords + grad = [1, 2, 3, 2, 1, 2, 2, 2][dim] # gradient coords (for derivatives) + if 'get_resinfo' in name: return 1 # only mip level + packed, unpacked = 0, 0 + if '_mip' in name: packed += 1 + elif 'sample' in name or 'gather' in name: + if '_o' in name: unpacked += 1 # offset + if re.search(r'_c(_|$)', name): unpacked += 1 # compare (not _cl) + if '_d' in name: unpacked += (grad + 1) & ~1 if '_g16' in name else grad*2 # derivatives + if '_b' in name: unpacked += 1 # bias + if '_l' in name and '_cl' not in name and '_lz' not in name: packed += 1 # LOD + if '_cl' in name: packed += 1 # clamp + return (base + packed + 1) // 2 + unpacked if a16 else base + packed + unpacked + +def _disasm_mimg(inst: MIMG) -> str: + name = MIMGOp(inst.op).name.lower() + srsrc_base = inst.srsrc * 4 + srsrc_str = _reg("ttmp", srsrc_base - 108, 8) if 108 <= srsrc_base <= 123 else _sreg(srsrc_base, 8) + # BVH intersect ray: special case with 4 SGPR srsrc + if 'bvh' in name: + vaddr = (9 if '64' in name else 8) if inst.a16 else (12 if '64' in name else 11) + srsrc = _reg("ttmp", srsrc_base - 108, 4) if 108 <= srsrc_base <= 123 else _sreg(srsrc_base, 4) + return f"{name} {_vreg(inst.vdata, 4)}, {_vreg(inst.vaddr, vaddr)}, {srsrc}{' a16' if inst.a16 else ''}" + # vdata width from dmask (gather4/msaa_load always 4), d16 packs, tfe adds 1 + vdata = 4 if 'gather4' in name or 'msaa_load' in name else (bin(inst.dmask).count('1') or 1) + if inst.d16: vdata = (vdata + 1) // 2 + if inst.tfe: vdata += 1 + # vaddr width + dim_names = ['1d', '2d', '3d', 'cube', '1d_array', '2d_array', '2d_msaa', '2d_msaa_array'] + dim = dim_names[inst.dim] if inst.dim < len(dim_names) else f"dim_{inst.dim}" + vaddr = _mimg_vaddr_width(name, inst.dim, inst.a16) + vaddr_str = f"v{inst.vaddr}" if vaddr == 1 else _vreg(inst.vaddr, vaddr) + # modifiers + mods = [f"dmask:0x{inst.dmask:x}"] if inst.dmask and (inst.dmask != 15 or 'atomic' in name) else [] + mods.append(f"dim:SQ_RSRC_IMG_{dim.upper()}") + for flag, mod in [(inst.unrm,"unorm"),(inst.glc,"glc"),(inst.slc,"slc"),(inst.dlc,"dlc"),(inst.r128,"r128"), + (inst.a16,"a16"),(inst.tfe,"tfe"),(inst.lwe,"lwe"),(inst.d16,"d16")]: + if flag: mods.append(mod) + # ssamp for sample/gather/get_lod + ssamp_str = "" + if 'sample' in name or 'gather' in name or 'get_lod' in name: + ssamp_base = inst.ssamp * 4 + ssamp_str = ", " + (_reg("ttmp", ssamp_base - 108, 4) if 108 <= ssamp_base <= 123 else _sreg(ssamp_base, 4)) + return f"{name} {_vreg(inst.vdata, vdata)}, {vaddr_str}, {srsrc_str}{ssamp_str} {' '.join(mods)}" + +def _sop_widths(name: str) -> tuple[int, int, int]: + """Return (dst_width, src0_width, src1_width) in register count for SOP instructions.""" + if name in ('s_bitset0_b64', 's_bitset1_b64', 's_bfm_b64'): return 2, 1, 1 + if name in ('s_lshl_b64', 's_lshr_b64', 's_ashr_i64', 's_bfe_u64', 's_bfe_i64'): return 2, 2, 1 + if name in ('s_bitcmp0_b64', 's_bitcmp1_b64'): return 1, 2, 1 + if m := re.search(r'_(b|i|u)(32|64)_(b|i|u)(32|64)$', name): return 2 if m.group(2) == '64' else 1, 2 if m.group(4) == '64' else 1, 1 + if m := re.search(r'_(b|i|u)(32|64)$', name): sz = 2 if m.group(2) == '64' else 1; return sz, sz, sz + return 1, 1, 1 + +def _disasm_sop1(inst: SOP1) -> str: + op, name = SOP1Op(inst.op), SOP1Op(inst.op).name.lower() + if op == SOP1Op.S_GETPC_B64: return f"{name} {_fmt_sdst(inst.sdst, 2)}" + if op in (SOP1Op.S_SETPC_B64, SOP1Op.S_RFE_B64): return f"{name} {_fmt_src(inst.ssrc0, 2)}" + if op == SOP1Op.S_SWAPPC_B64: return f"{name} {_fmt_sdst(inst.sdst, 2)}, {_fmt_src(inst.ssrc0, 2)}" + if op in (SOP1Op.S_SENDMSG_RTN_B32, SOP1Op.S_SENDMSG_RTN_B64): return f"{name} {_fmt_sdst(inst.sdst, 2 if 'b64' in name else 1)}, sendmsg({MSG.get(inst.ssrc0, str(inst.ssrc0))})" + dn, s0n, _ = _sop_widths(name) + return f"{name} {_fmt_sdst(inst.sdst, dn)}, {inst.lit(inst.ssrc0) if s0n == 1 else _fmt_src(inst.ssrc0, s0n)}" + +def _disasm_sop2(inst: SOP2) -> str: + name = SOP2Op(inst.op).name.lower() + dn, s0n, s1n = _sop_widths(name) + return f"{name} {_fmt_sdst(inst.sdst, dn)}, {inst.lit(inst.ssrc0) if inst.ssrc0 == 255 else _fmt_src(inst.ssrc0, s0n)}, {inst.lit(inst.ssrc1) if inst.ssrc1 == 255 else _fmt_src(inst.ssrc1, s1n)}" + +def _disasm_sopc(inst: SOPC) -> str: + name = SOPCOp(inst.op).name.lower() + _, s0n, s1n = _sop_widths(name) + return f"{name} {_fmt_src(inst.ssrc0, s0n)}, {_fmt_src(inst.ssrc1, s1n)}" + +def _disasm_sopk(inst: SOPK) -> str: + op, name = SOPKOp(inst.op), SOPKOp(inst.op).name.lower() + if op == SOPKOp.S_VERSION: return f"{name} 0x{inst.simm16:x}" + if op in (SOPKOp.S_SETREG_B32, SOPKOp.S_GETREG_B32): + hid, hoff, hsz = inst.simm16 & 0x3f, (inst.simm16 >> 6) & 0x1f, ((inst.simm16 >> 11) & 0x1f) + 1 + hs = f"0x{inst.simm16:x}" if hid in (16, 17) else f"hwreg({HWREG.get(hid, str(hid))}, {hoff}, {hsz})" + return f"{name} {hs}, {_fmt_sdst(inst.sdst, 1)}" if op == SOPKOp.S_SETREG_B32 else f"{name} {_fmt_sdst(inst.sdst, 1)}, {hs}" + dn, _, _ = _sop_widths(name) + return f"{name} {_fmt_sdst(inst.sdst, dn)}, 0x{inst.simm16:x}" + +def _disasm_vinterp(inst: VINTERP) -> str: + name = VINTERPOp(inst.op).name.lower() + src0 = f"-{inst.lit(inst.src0)}" if inst.neg & 1 else inst.lit(inst.src0) + src1 = f"-{inst.lit(inst.src1)}" if inst.neg & 2 else inst.lit(inst.src1) + src2 = f"-{inst.lit(inst.src2)}" if inst.neg & 4 else inst.lit(inst.src2) + mods = _mods((inst.waitexp, f"wait_exp:{inst.waitexp}"), (inst.clmp, "clamp")) + return f"{name} v{inst.vdst}, {src0}, {src1}, {src2}" + (" " + mods if mods else "") + +def _disasm_generic(inst: Inst) -> str: + name = f"op_{inst.op}" + def format_field(field_name, val): + val = unwrap(val) + if field_name in SRC_FIELDS: return inst.lit(val) if val != 255 else "0xff" + return f"{'s' if field_name == 'sdst' else 'v'}{val}" if field_name in ('sdst', 'vdst') else f"v{val}" if field_name == 'vsrc1' else f"0x{val:x}" if field_name == 'simm16' else str(val) + operands = [format_field(field_name, inst._values.get(field_name, 0)) for field_name in inst._fields if field_name not in ('encoding', 'op')] + return f"{name} {', '.join(operands)}" if operands else name + +DISASM_HANDLERS = {VOP1: _disasm_vop1, VOP2: _disasm_vop2, VOPC: _disasm_vopc, VOP3: _disasm_vop3, VOP3SD: _disasm_vop3sd, VOPD: _disasm_vopd, VOP3P: _disasm_vop3p, + VINTERP: _disasm_vinterp, SOPP: _disasm_sopp, SMEM: _disasm_smem, DS: _disasm_ds, FLAT: _disasm_flat, MUBUF: _disasm_buf, MTBUF: _disasm_buf, + MIMG: _disasm_mimg, SOP1: _disasm_sop1, SOP2: _disasm_sop2, SOPC: _disasm_sopc, SOPK: _disasm_sopk} + +def disasm(inst: Inst) -> str: return DISASM_HANDLERS.get(type(inst), _disasm_generic)(inst) + +# ═══════════════════════════════════════════════════════════════════════════════ +# ASSEMBLER +# ═══════════════════════════════════════════════════════════════════════════════ + +SPEC_REGS = {'vcc_lo': RawImm(106), 'vcc_hi': RawImm(107), 'vcc': RawImm(106), 'null': RawImm(124), 'off': RawImm(124), 'm0': RawImm(125), + 'exec_lo': RawImm(126), 'exec_hi': RawImm(127), 'exec': RawImm(126), 'scc': RawImm(253), 'src_scc': RawImm(253)} +FLOATS = {'0.5': 0.5, '-0.5': -0.5, '1.0': 1.0, '-1.0': -1.0, '2.0': 2.0, '-2.0': -2.0, '4.0': 4.0, '-4.0': -4.0} REG_MAP: dict[str, _RegFactory] = {'s': s, 'v': v, 't': ttmp, 'ttmp': ttmp} - -def parse_operand(op: str) -> tuple: - op = op.strip().lower() - neg = op.startswith('-') and not op[1:2].isdigit(); op = op[1:] if neg else op - abs_ = op.startswith('|') and op.endswith('|') or op.startswith('abs(') and op.endswith(')') - op = op[1:-1] if op.startswith('|') else op[4:-1] if op.startswith('abs(') else op - hi_half = op.endswith('.h') - op = re.sub(r'\.[lh]$', '', op) - if op in FLOAT_CONSTS: return (FLOAT_CONSTS[op], neg, abs_, hi_half) - if re.match(r'^-?\d+$', op): return (int(op), neg, abs_, hi_half) - if m := re.match(r'^-?0x([0-9a-f]+)$', op): - v = -int(m.group(1), 16) if op.startswith('-') else int(m.group(1), 16) - return (v, neg, abs_, hi_half) - if op in SPECIAL_REGS: return (SPECIAL_REGS[op], neg, abs_, hi_half) - if op == 'lit': return (RawImm(255), neg, abs_, hi_half) # literal marker (actual value comes from literal word) - if m := re.match(r'^([svt](?:tmp)?)\[(\d+):(\d+)\]$', op): return (REG_MAP[m.group(1)][int(m.group(2)):int(m.group(3))], neg, abs_, hi_half) - if m := re.match(r'^([svt](?:tmp)?)(\d+)$', op): - reg = REG_MAP[m.group(1)][int(m.group(2))] - reg.hi = hi_half - return (reg, neg, abs_, hi_half) - # hwreg(name, offset, size) or hwreg(name) -> simm16 encoding - if m := re.match(r'^hwreg\((\w+)(?:,\s*(\d+),\s*(\d+))?\)$', op): - name_str = m.group(1).lower() - hwreg_id = HWREG_IDS.get(name_str, int(name_str) if name_str.isdigit() else None) - if hwreg_id is None: raise ValueError(f"unknown hwreg name: {name_str}") - offset, size = int(m.group(2)) if m.group(2) else 0, int(m.group(3)) if m.group(3) else 32 - return (((size - 1) << 11) | (offset << 6) | hwreg_id, neg, abs_, hi_half) - raise ValueError(f"cannot parse operand: {op}") - SMEM_OPS = {'s_load_b32', 's_load_b64', 's_load_b128', 's_load_b256', 's_load_b512', 's_buffer_load_b32', 's_buffer_load_b64', 's_buffer_load_b128', 's_buffer_load_b256', 's_buffer_load_b512'} -SOP1_SRC_ONLY = {'s_setpc_b64', 's_rfe_b64'} -SOP1_MSG_IMM = {'s_sendmsg_rtn_b32', 's_sendmsg_rtn_b64'} -SOPK_IMM_ONLY = {'s_version'} -SOPK_IMM_FIRST = {'s_setreg_b32'} -SOPK_UNSUPPORTED = {'s_setreg_imm32_b32'} +SPEC_DSL = {'vcc_lo': 'VCC_LO', 'vcc_hi': 'VCC_HI', 'vcc': 'VCC_LO', 'null': 'NULL', 'off': 'OFF', 'm0': 'M0', + 'exec_lo': 'EXEC_LO', 'exec_hi': 'EXEC_HI', 'exec': 'EXEC_LO', 'scc': 'SCC', 'src_scc': 'SCC'} -def _operand_to_dsl(op: str) -> str: - """Transform a single operand from LLVM assembly syntax to DSL expression string.""" +def _op2dsl(op: str) -> str: op = op.strip() - # Handle negation prefix - neg = False - if op.startswith('-') and not (op[1:2].isdigit() or (len(op) > 2 and op[1] == '0' and op[2] in 'xX')): - neg, op = True, op[1:] - # Handle abs modifier: |x| or abs(x) - abs_ = False - if op.startswith('|') and op.endswith('|'): - abs_, op = True, op[1:-1] - elif op.startswith('abs(') and op.endswith(')'): - abs_, op = True, op[4:-1] - # Handle .h/.l suffix for 16-bit ops - hi_suffix = "" - if op.endswith('.h'): hi_suffix, op = ".h", op[:-2] - elif op.endswith('.l'): hi_suffix, op = ".l", op[:-2] - op_lower = op.lower() + neg = op.startswith('-') and not (op[1:2].isdigit() or (len(op) > 2 and op[1] == '0' and op[2] in 'xX')) + if neg: op = op[1:] + abs_ = (op.startswith('|') and op.endswith('|')) or (op.startswith('abs(') and op.endswith(')')) + if abs_: op = op[1:-1] if op.startswith('|') else op[4:-1] + hi = ".h" if op.endswith('.h') else ".l" if op.endswith('.l') else "" + if hi: op = op[:-2] + lo = op.lower() + def wrap(b): return f"{'-' if neg else ''}abs({b}){hi}" if abs_ else f"-{b}{hi}" if neg else f"{b}{hi}" + if lo in SPEC_DSL: return wrap(SPEC_DSL[lo]) + if op in FLOATS: return wrap(op) + rp = {'s': 's', 'v': 'v', 't': 'ttmp', 'ttmp': 'ttmp'} + if m := re.match(r'^([svt](?:tmp)?)\[(\d+):(\d+)\]$', lo): return wrap(f"{rp[m.group(1)]}[{m.group(2)}:{m.group(3)}]") + if m := re.match(r'^([svt](?:tmp)?)(\d+)$', lo): return wrap(f"{rp[m.group(1)]}[{m.group(2)}]") + if re.match(r'^-?\d+$|^-?0x[0-9a-fA-F]+$', op): return f"SrcMod({op}, neg={neg}, abs_={abs_})" if neg or abs_ else op + return wrap(op) - # Helper to apply modifiers - def apply_mods(base: str) -> str: - if not neg and not abs_: return f"{base}{hi_suffix}" - if abs_: return f"{'-' if neg else ''}abs({base}){hi_suffix}" - return f"-{base}{hi_suffix}" +def _parse_ops(s: str) -> list[str]: + ops, cur, depth, pipe = [], "", 0, False + for c in s: + if c in '[(': depth += 1 + elif c in '])': depth -= 1 + elif c == '|': pipe = not pipe + if c == ',' and depth == 0 and not pipe: ops.append(cur.strip()); cur = "" + else: cur += c + if cur.strip(): ops.append(cur.strip()) + return ops - # Special registers - vcc maps to VCC_LO (64-bit alias) - special_map = {'vcc_lo': 'VCC_LO', 'vcc_hi': 'VCC_HI', 'vcc': 'VCC_LO', 'null': 'NULL', 'off': 'OFF', - 'm0': 'M0', 'exec_lo': 'EXEC_LO', 'exec_hi': 'EXEC_HI', 'exec': 'EXEC_LO', 'scc': 'SCC', - 'src_scc': 'SCC'} - if op_lower in special_map: return apply_mods(special_map[op_lower]) - # Float constants - float_map = {'0.5': '0.5', '-0.5': '-0.5', '1.0': '1.0', '-1.0': '-1.0', '2.0': '2.0', '-2.0': '-2.0', '4.0': '4.0', '-4.0': '-4.0'} - if op in float_map: return apply_mods(float_map[op]) - # Register range: v[0:3], s[4:7] - if m := re.match(r'^([svt](?:tmp)?)\[(\d+):(\d+)\]$', op_lower): - prefix = {'s': 's', 'v': 'v', 't': 'ttmp', 'ttmp': 'ttmp'}[m.group(1)] - return apply_mods(f"{prefix}[{m.group(2)}:{m.group(3)}]") - # Single register: v0, s1, ttmp5 - if m := re.match(r'^([svt](?:tmp)?)(\d+)$', op_lower): - prefix = {'s': 's', 'v': 'v', 't': 'ttmp', 'ttmp': 'ttmp'}[m.group(1)] - return apply_mods(f"{prefix}[{m.group(2)}]") - # Integer literals (decimal or hex) - use SrcMod wrapper when modifiers present - if re.match(r'^-?\d+$', op) or re.match(r'^-?0x([0-9a-fA-F]+)$', op): - if neg or abs_: - return f"SrcMod({op}, neg={neg}, abs_={abs_})" - return op - # hwreg(name, offset, size) -> pass through - if op_lower.startswith('hwreg('): return apply_mods(op) - # sendmsg(...) -> pass through - if op_lower.startswith('sendmsg('): return apply_mods(op) - # Fallback: return as-is - return apply_mods(op) - -def _parse_operands(op_str: str) -> list[str]: - """Parse comma-separated operands, respecting brackets and pipes.""" - operands, current, depth, in_pipe = [], "", 0, False - for ch in op_str: - if ch in '[(': depth += 1 - elif ch in '])': depth -= 1 - elif ch == '|': in_pipe = not in_pipe - if ch == ',' and depth == 0 and not in_pipe: - operands.append(current.strip()) - current = "" - else: - current += ch - if current.strip(): operands.append(current.strip()) - return operands - -def _unwrap_dsl(s: str) -> str: - """Unwrap a DSL expression to get the raw value for literals.""" - if re.match(r'^-?\d+$', s): return s - if re.match(r'^-?0x[0-9a-fA-F]+$', s): return s - return s +def _extract(text: str, pat: str, flags=re.I): + if m := re.search(pat, text, flags): return m, text[:m.start()] + text[m.end():] + return None, text def get_dsl(text: str) -> str: - """Transform LLVM-style assembly instruction to Python DSL expression string.""" - text = text.strip() - # Extract and remove trailing modifiers (must happen before operand parsing) - kwargs = [] - # Extract mul:N and div:N modifiers (omod) - omod_val = 0 - if m := re.search(r'\s+mul:2(?:\s|$)', text, re.I): - omod_val = 1; text = text[:m.start()] + text[m.end():] - elif m := re.search(r'\s+mul:4(?:\s|$)', text, re.I): - omod_val = 2; text = text[:m.start()] + text[m.end():] - elif m := re.search(r'\s+div:2(?:\s|$)', text, re.I): - omod_val = 3; text = text[:m.start()] + text[m.end():] - if omod_val: kwargs.append(f'omod={omod_val}') - # Extract clamp modifier - if m := re.search(r'\s+clamp(?:\s|$)', text, re.I): - kwargs.append('clmp=1') - text = text[:m.start()] + text[m.end():] - # Extract op_sel:[...] modifier - interpretation depends on format: - # VOP3: [src0, src1, dst] or [src0, src1, src2, dst] -> bits 0, 1, (2), 3 - # VOP3P/WMMA: [src0, src1, src2] -> bits 0, 1, 2 (no dst bit, 3-source ops) - opsel_explicit = None - if m := re.search(r'\s+op_sel:\[([^\]]+)\]', text, re.I): - bits = [int(x.strip()) for x in m.group(1).split(',')] - # Check if this is a VOP3P instruction (v_pk_*, v_wmma_*, v_dot*) - mnemonic = text.split()[0].lower() - is_vop3p = mnemonic.startswith(('v_pk_', 'v_wmma_', 'v_dot')) - if len(bits) == 3: - if is_vop3p: - # VOP3P: [src0, src1, src2] -> bits 0, 1, 2 - opsel_explicit = bits[0] | (bits[1] << 1) | (bits[2] << 2) - else: - # VOP3: [src0, src1, dst] -> bits 0, 1, 3 - opsel_explicit = bits[0] | (bits[1] << 1) | (bits[2] << 3) - else: - opsel_explicit = sum(b << i for i, b in enumerate(bits)) - text = text[:m.start()] + text[m.end():] - if m := re.search(r'\s+wait_exp:(\d+)', text, re.I): - kwargs.append(f'waitexp={m.group(1)}') - text = text[:m.start()] + text[m.end():] - # Extract offset:N for FLAT/GLOBAL/SCRATCH/SMEM (can be hex or decimal) - offset_val = None - if m := re.search(r'\s+offset:(0x[0-9a-fA-F]+|-?\d+)', text, re.I): - offset_val = m.group(1) - text = text[:m.start()] + text[m.end():] - # Extract dlc modifier (before glc to avoid partial match issues) - dlc_val = None - if m := re.search(r'\s+dlc(?:\s|$)', text, re.I): - dlc_val = 1 - text = text[:m.start()] + text[m.end():] - # Extract glc modifier - glc_val = None - if m := re.search(r'\s+glc(?:\s|$)', text, re.I): - glc_val = 1 - text = text[:m.start()] + text[m.end():] - # Extract neg_lo:[...] and neg_hi:[...] for VOP3P - neg_lo_val = None - if m := re.search(r'\s+neg_lo:\[([^\]]+)\]', text, re.I): - bits = [int(x.strip()) for x in m.group(1).split(',')] - neg_lo_val = sum(b << i for i, b in enumerate(bits)) - text = text[:m.start()] + text[m.end():] - neg_hi_val = None - if m := re.search(r'\s+neg_hi:\[([^\]]+)\]', text, re.I): - bits = [int(x.strip()) for x in m.group(1).split(',')] - neg_hi_val = sum(b << i for i, b in enumerate(bits)) - text = text[:m.start()] + text[m.end():] + text, kw = text.strip(), [] + # Extract modifiers + for pat, val in [(r'\s+mul:2(?:\s|$)', 1), (r'\s+mul:4(?:\s|$)', 2), (r'\s+div:2(?:\s|$)', 3)]: + if (m := _extract(text, pat))[0]: kw.append(f'omod={val}'); text = m[1]; break + if (m := _extract(text, r'\s+clamp(?:\s|$)'))[0]: kw.append('clmp=1'); text = m[1] + opsel, m, text = None, *_extract(text, r'\s+op_sel:\[([^\]]+)\]') + if m: + bits, mn = [int(x.strip()) for x in m.group(1).split(',')], text.split()[0].lower() + is3p = mn.startswith(('v_pk_', 'v_wmma_', 'v_dot')) + opsel = (bits[0] | (bits[1] << 1) | (bits[2] << 2)) if len(bits) == 3 and is3p else \ + (bits[0] | (bits[1] << 1) | (bits[2] << 3)) if len(bits) == 3 else sum(b << i for i, b in enumerate(bits)) + m, text = _extract(text, r'\s+wait_exp:(\d+)'); waitexp = m.group(1) if m else None + m, text = _extract(text, r'\s+offset:(0x[0-9a-fA-F]+|-?\d+)'); off_val = m.group(1) if m else None + m, text = _extract(text, r'\s+dlc(?:\s|$)'); dlc = 1 if m else None + m, text = _extract(text, r'\s+glc(?:\s|$)'); glc = 1 if m else None + m, text = _extract(text, r'\s+slc(?:\s|$)'); slc = 1 if m else None + m, text = _extract(text, r'\s+neg_lo:\[([^\]]+)\]'); neg_lo = sum(int(x.strip()) << i for i, x in enumerate(m.group(1).split(','))) if m else None + m, text = _extract(text, r'\s+neg_hi:\[([^\]]+)\]'); neg_hi = sum(int(x.strip()) << i for i, x in enumerate(m.group(1).split(','))) if m else None + if waitexp: kw.append(f'waitexp={waitexp}') + parts = text.replace(',', ' ').split() if not parts: raise ValueError("empty instruction") - mnemonic, op_str = parts[0].lower(), text[len(parts[0]):].strip() - # Handle s_waitcnt specially - if mnemonic == 's_waitcnt': - vmcnt, expcnt, lgkmcnt = 0x3f, 0x7, 0x3f - for part in op_str.replace(',', ' ').split(): - if m := re.match(r'vmcnt\((\d+)\)', part): vmcnt = int(m.group(1)) - elif m := re.match(r'expcnt\((\d+)\)', part): expcnt = int(m.group(1)) - elif m := re.match(r'lgkmcnt\((\d+)\)', part): lgkmcnt = int(m.group(1)) - elif re.match(r'^0x[0-9a-f]+$|^\d+$', part): return f"s_waitcnt(simm16={int(part, 0)})" - wc = waitcnt(vmcnt, expcnt, lgkmcnt) - return f"s_waitcnt(simm16={wc})" - # Handle VOPD dual-issue: opx dst, src :: opy dst, src + mn, op_str = parts[0].lower(), text[len(parts[0]):].strip() + ops, args = _parse_ops(op_str), [_op2dsl(o) for o in _parse_ops(op_str)] + + # s_waitcnt + if mn == 's_waitcnt': + vm, exp, lgkm = 0x3f, 0x7, 0x3f + for p in op_str.replace(',', ' ').split(): + if m := re.match(r'vmcnt\((\d+)\)', p): vm = int(m.group(1)) + elif m := re.match(r'expcnt\((\d+)\)', p): exp = int(m.group(1)) + elif m := re.match(r'lgkmcnt\((\d+)\)', p): lgkm = int(m.group(1)) + elif re.match(r'^0x[0-9a-f]+$|^\d+$', p): return f"s_waitcnt(simm16={int(p, 0)})" + return f"s_waitcnt(simm16={waitcnt(vm, exp, lgkm)})" + + # VOPD if '::' in text: - x_part, y_part = text.split('::') - x_parts, y_parts = x_part.strip().replace(',', ' ').split(), y_part.strip().replace(',', ' ').split() - opx_name, opy_name = x_parts[0].upper(), y_parts[0].upper() - x_ops = [_operand_to_dsl(p) for p in x_parts[1:]] - y_ops = [_operand_to_dsl(p) for p in y_parts[1:]] - vdstx, srcx0 = x_ops[0], x_ops[1] if len(x_ops) > 1 else '0' - vsrcx1 = x_ops[2] if len(x_ops) > 2 else 'v[0]' - vdsty, srcy0 = y_ops[0], y_ops[1] if len(y_ops) > 1 else '0' - vsrcy1 = y_ops[2] if len(y_ops) > 2 else 'v[0]' - lit = None - if 'fmaak' in opx_name.lower() and len(x_ops) > 3: lit = x_ops[3] - elif 'fmamk' in opx_name.lower() and len(x_ops) > 3: lit, vsrcx1 = x_ops[2], x_ops[3] - elif 'fmaak' in opy_name.lower() and len(y_ops) > 3: lit = y_ops[3] - elif 'fmamk' in opy_name.lower() and len(y_ops) > 3: lit, vsrcy1 = y_ops[2], y_ops[3] - lit_str = f", literal={lit}" if lit else "" - return f"VOPD(VOPDOp.{opx_name}, VOPDOp.{opy_name}, vdstx={vdstx}, vdsty={vdsty}, srcx0={srcx0}, vsrcx1={vsrcx1}, srcy0={srcy0}, vsrcy1={vsrcy1}{lit_str})" - operands = _parse_operands(op_str) - dsl_args = [_operand_to_dsl(op) for op in operands] - # Handle special instructions - if mnemonic in SOPK_UNSUPPORTED: raise ValueError(f"unsupported instruction: {mnemonic}") - if mnemonic in SOP1_SRC_ONLY: return f"{mnemonic}(ssrc0={dsl_args[0]})" - if mnemonic in SOP1_MSG_IMM: return f"{mnemonic}(sdst={dsl_args[0]}, ssrc0=RawImm({_unwrap_dsl(dsl_args[1])}))" - if mnemonic in SOPK_IMM_ONLY: return f"{mnemonic}(simm16={dsl_args[0]})" - if mnemonic in SOPK_IMM_FIRST: return f"{mnemonic}(simm16={dsl_args[0]}, sdst={dsl_args[1]})" - # SMEM with immediate offset (offset in operand[2] or offset: modifier) - if mnemonic in SMEM_OPS: - glc_str = ", glc=1" if glc_val else "" - dlc_str = ", dlc=1" if dlc_val else "" - # Pure immediate offset in operand[2] - if len(operands) >= 3 and re.match(r'^-?[0-9]|^-?0x', operands[2].strip().lower()): - return f"{mnemonic}(sdata={dsl_args[0]}, sbase={dsl_args[1]}, offset={dsl_args[2]}, soffset=RawImm(124){glc_str}{dlc_str})" - # Register soffset with offset: modifier - if offset_val and len(operands) >= 3: - return f"{mnemonic}(sdata={dsl_args[0]}, sbase={dsl_args[1]}, offset={offset_val}, soffset={dsl_args[2]}{glc_str}{dlc_str})" - # Register soffset only (no offset modifier) - if len(operands) >= 3: - return f"{mnemonic}(sdata={dsl_args[0]}, sbase={dsl_args[1]}, soffset={dsl_args[2]}{glc_str}{dlc_str})" - # Buffer ops with 'off' - if mnemonic.startswith('buffer_') and len(operands) >= 2 and operands[1].strip().lower() == 'off': - soff = f"RawImm({_unwrap_dsl(dsl_args[3])})" if len(dsl_args) > 3 else "RawImm(0)" - return f"{mnemonic}(vdata={dsl_args[0]}, vaddr=0, srsrc={dsl_args[2]}, soffset={soff})" - # FLAT/GLOBAL/SCRATCH load - if (mnemonic.startswith('flat_load') or mnemonic.startswith('global_load') or mnemonic.startswith('scratch_load')) and len(dsl_args) >= 3: - off = f", offset={offset_val}" if offset_val else "" - return f"{mnemonic}(vdst={dsl_args[0]}, addr={dsl_args[1]}, saddr={dsl_args[2]}{off})" - # FLAT/GLOBAL/SCRATCH store - if (mnemonic.startswith('flat_store') or mnemonic.startswith('global_store') or mnemonic.startswith('scratch_store')) and len(dsl_args) >= 3: - off = f", offset={offset_val}" if offset_val else "" - return f"{mnemonic}(addr={dsl_args[0]}, data={dsl_args[1]}, saddr={dsl_args[2]}{off})" - # Handle v_fmaak/v_fmamk literals - lit_str = "" - if mnemonic in ('v_fmaak_f32', 'v_fmaak_f16') and len(dsl_args) == 4: - lit_str, dsl_args = f", literal={_unwrap_dsl(dsl_args[3])}", dsl_args[:3] - elif mnemonic in ('v_fmamk_f32', 'v_fmamk_f16') and len(dsl_args) == 4: - lit_str, dsl_args = f", literal={_unwrap_dsl(dsl_args[2])}", [dsl_args[0], dsl_args[1], dsl_args[3]] - # Handle v_add_co_ci_u32_e32 etc with vcc operands - strip implicit vcc sdst and carry_in, add _e32 suffix + xp, yp = text.split('::') + xps, yps = xp.strip().replace(',', ' ').split(), yp.strip().replace(',', ' ').split() + xo, yo = [_op2dsl(p) for p in xps[1:]], [_op2dsl(p) for p in yps[1:]] + vdx, sx0, vsx1 = xo[0], xo[1] if len(xo) > 1 else '0', xo[2] if len(xo) > 2 else 'v[0]' + vdy, sy0, vsy1 = yo[0], yo[1] if len(yo) > 1 else '0', yo[2] if len(yo) > 2 else 'v[0]' + lit = xo[3] if 'fmaak' in xps[0].lower() and len(xo) > 3 else yo[3] if 'fmaak' in yps[0].lower() and len(yo) > 3 else None + if 'fmamk' in xps[0].lower() and len(xo) > 3: lit, vsx1 = xo[2], xo[3] + elif 'fmamk' in yps[0].lower() and len(yo) > 3: lit, vsy1 = yo[2], yo[3] + return f"VOPD(VOPDOp.{xps[0].upper()}, VOPDOp.{yps[0].upper()}, vdstx={vdx}, vdsty={vdy}, srcx0={sx0}, vsrcx1={vsx1}, srcy0={sy0}, vsrcy1={vsy1}{f', literal={lit}' if lit else ''})" + + # Special instructions + if mn == 's_setreg_imm32_b32': raise ValueError(f"unsupported: {mn}") + if mn in ('s_setpc_b64', 's_rfe_b64'): return f"{mn}(ssrc0={args[0]})" + if mn in ('s_sendmsg_rtn_b32', 's_sendmsg_rtn_b64'): return f"{mn}(sdst={args[0]}, ssrc0=RawImm({args[1].strip()}))" + if mn == 's_version': return f"{mn}(simm16={args[0]})" + if mn == 's_setreg_b32': return f"{mn}(simm16={args[0]}, sdst={args[1]})" + + # SMEM + if mn in SMEM_OPS: + gs, ds = ", glc=1" if glc else "", ", dlc=1" if dlc else "" + if len(ops) >= 3 and re.match(r'^-?[0-9]|^-?0x', ops[2].strip().lower()): + return f"{mn}(sdata={args[0]}, sbase={args[1]}, offset={args[2]}, soffset=RawImm(124){gs}{ds})" + if off_val and len(ops) >= 3: return f"{mn}(sdata={args[0]}, sbase={args[1]}, offset={off_val}, soffset={args[2]}{gs}{ds})" + if len(ops) >= 3: return f"{mn}(sdata={args[0]}, sbase={args[1]}, soffset={args[2]}{gs}{ds})" + + # Buffer + if mn.startswith('buffer_') and len(ops) >= 2 and ops[1].strip().lower() == 'off': + return f"{mn}(vdata={args[0]}, vaddr=0, srsrc={args[2]}, soffset={f'RawImm({args[3].strip()})' if len(args) > 3 else 'RawImm(0)'})" + + # FLAT/GLOBAL/SCRATCH load/store/atomic - saddr needs RawImm(124) for off/null + def _saddr(a): return 'RawImm(124)' if a in ('OFF', 'NULL') else a + flat_mods = f"{f', offset={off_val}' if off_val else ''}{', glc=1' if glc else ''}{', slc=1' if slc else ''}{', dlc=1' if dlc else ''}" + for pre, flds in [('flat_load','vdst,addr,saddr'), ('global_load','vdst,addr,saddr'), ('scratch_load','vdst,addr,saddr'), + ('flat_store','addr,data,saddr'), ('global_store','addr,data,saddr'), ('scratch_store','addr,data,saddr')]: + if mn.startswith(pre) and len(args) >= 2: + f0, f1, f2 = flds.split(',') + return f"{mn}({f0}={args[0]}, {f1}={args[1]}{f', {f2}={_saddr(args[2])}' if len(args) >= 3 else ', saddr=RawImm(124)'}{flat_mods})" + for pre in ('flat_atomic', 'global_atomic', 'scratch_atomic'): + if mn.startswith(pre): + if glc and len(args) >= 3: return f"{mn}(vdst={args[0]}, addr={args[1]}, data={args[2]}{f', saddr={_saddr(args[3])}' if len(args) >= 4 else ', saddr=RawImm(124)'}{flat_mods})" + if len(args) >= 2: return f"{mn}(addr={args[0]}, data={args[1]}{f', saddr={_saddr(args[2])}' if len(args) >= 3 else ', saddr=RawImm(124)'}{flat_mods})" + + # DS instructions + if mn.startswith('ds_'): + off0, off1 = (str(int(off_val, 0) & 0xff), str((int(off_val, 0) >> 8) & 0xff)) if off_val else ("0", "0") + gds_s = ", gds=1" if 'gds' in text.lower().split()[-1:] else "" + off_kw = f", offset0={off0}, offset1={off1}{gds_s}" + if mn == 'ds_nop' or mn in ('ds_gws_sema_v', 'ds_gws_sema_p', 'ds_gws_sema_release_all'): return f"{mn}({off_kw.lstrip(', ')})" + if 'gws_' in mn: return f"{mn}(addr={args[0]}{off_kw})" + if 'consume' in mn or 'append' in mn: return f"{mn}(vdst={args[0]}{off_kw})" + if 'gs_reg' in mn: return f"{mn}(vdst={args[0]}, data0={args[1]}{off_kw})" + if '2addr' in mn: + if 'load' in mn: return f"{mn}(vdst={args[0]}, addr={args[1]}{off_kw})" + if 'store' in mn and 'xchg' not in mn: return f"{mn}(addr={args[0]}, data0={args[1]}, data1={args[2]}{off_kw})" + return f"{mn}(vdst={args[0]}, addr={args[1]}, data0={args[2]}, data1={args[3]}{off_kw})" + if 'load' in mn: return f"{mn}(vdst={args[0]}{off_kw})" if 'addtid' in mn else f"{mn}(vdst={args[0]}, addr={args[1]}{off_kw})" + if 'store' in mn and not _has(mn, 'cmp', 'xchg'): + return f"{mn}(data0={args[0]}{off_kw})" if 'addtid' in mn else f"{mn}(addr={args[0]}, data0={args[1]}{off_kw})" + if 'swizzle' in mn or 'ordered_count' in mn: return f"{mn}(vdst={args[0]}, addr={args[1]}{off_kw})" + if 'permute' in mn: return f"{mn}(vdst={args[0]}, addr={args[1]}, data0={args[2]}{off_kw})" + if 'bvh' in mn: return f"{mn}(vdst={args[0]}, addr={args[1]}, data0={args[2]}, data1={args[3]}{off_kw})" + if 'condxchg' in mn: return f"{mn}(vdst={args[0]}, addr={args[1]}, data0={args[2]}{off_kw})" + if _has(mn, 'cmpstore', 'mskor', 'wrap'): + return f"{mn}(vdst={args[0]}, addr={args[1]}, data0={args[2]}, data1={args[3]}{off_kw})" if '_rtn' in mn else f"{mn}(addr={args[0]}, data0={args[1]}, data1={args[2]}{off_kw})" + return f"{mn}(vdst={args[0]}, addr={args[1]}, data0={args[2]}{off_kw})" if '_rtn' in mn else f"{mn}(addr={args[0]}, data0={args[1]}{off_kw})" + + # v_fmaak/v_fmamk literal extraction + lit_s = "" + if mn in ('v_fmaak_f32', 'v_fmaak_f16') and len(args) == 4: lit_s, args = f", literal={args[3].strip()}", args[:3] + elif mn in ('v_fmamk_f32', 'v_fmamk_f16') and len(args) == 4: lit_s, args = f", literal={args[2].strip()}", [args[0], args[1], args[3]] + + # VCC ops cleanup vcc_ops = {'v_add_co_ci_u32', 'v_sub_co_ci_u32', 'v_subrev_co_ci_u32'} - if mnemonic.replace('_e32', '') in vcc_ops and len(dsl_args) >= 5: - mnemonic = mnemonic.replace('_e32', '') + '_e32' # Ensure _e32 suffix for VOP2 encoding - dsl_args = [dsl_args[0], dsl_args[2], dsl_args[3]] - # Handle v_add_co_ci_u32_e64 etc - strip _e64 suffix (function name doesn't have it, returns VOP3SD) - if mnemonic.replace('_e64', '') in vcc_ops and mnemonic.endswith('_e64'): - mnemonic = mnemonic.replace('_e64', '') - # v_cmp_*_e32: strip implicit vcc_lo dest - if mnemonic.startswith('v_cmp') and not mnemonic.endswith('_e64') and len(dsl_args) >= 3 and operands[0].strip().lower() in ('vcc_lo', 'vcc_hi', 'vcc'): - dsl_args = dsl_args[1:] - # CMPX with _e64: prepend implicit EXEC_LO (vdst=126) - if 'cmpx' in mnemonic and mnemonic.endswith('_e64') and len(dsl_args) == 2: - dsl_args = ['RawImm(126)'] + dsl_args - # Build the function name - use mnemonic as-is, replacing . with _ - func_name = mnemonic.replace('.', '_') - # When explicit opsel is given, strip .h/.l from register args (opsel overrides) - if opsel_explicit is not None: - dsl_args = [re.sub(r'\.[hl]$', '', a) for a in dsl_args] - args_str = ', '.join(dsl_args) - all_kwargs = list(kwargs) - if lit_str: all_kwargs.append(lit_str.lstrip(', ')) - if opsel_explicit is not None: all_kwargs.append(f'opsel={opsel_explicit}') - if neg_lo_val is not None: all_kwargs.append(f'neg={neg_lo_val}') - if neg_hi_val is not None: all_kwargs.append(f'neg_hi={neg_hi_val}') - kwargs_str = ', '.join(all_kwargs) - if kwargs_str: - return f"{func_name}({args_str}, {kwargs_str})" if args_str else f"{func_name}({kwargs_str})" - return f"{func_name}({args_str})" + if mn.replace('_e32', '') in vcc_ops and len(args) >= 5: mn, args = mn.replace('_e32', '') + '_e32', [args[0], args[2], args[3]] + if mn.replace('_e64', '') in vcc_ops and mn.endswith('_e64'): mn = mn.replace('_e64', '') + if mn.startswith('v_cmp') and not mn.endswith('_e64') and len(args) >= 3 and ops[0].strip().lower() in ('vcc_lo', 'vcc_hi', 'vcc'): args = args[1:] + if 'cmpx' in mn and mn.endswith('_e64') and len(args) == 2: args = ['RawImm(126)'] + args + + fn = mn.replace('.', '_') + if opsel is not None: args = [re.sub(r'\.[hl]$', '', a) for a in args] + + # v_fma_mix*: extract inline neg/abs modifiers + if 'fma_mix' in mn and neg_lo is None and neg_hi is None: + inline_neg, inline_abs, clean_args = 0, 0, [args[0]] + for i, op in enumerate(ops[1:4]): + op = op.strip() + neg = op.startswith('-') and not (op[1:2].isdigit() or (len(op) > 2 and op[1] == '0' and op[2] in 'xX')) + if neg: op = op[1:] + abs_ = op.startswith('|') and op.endswith('|') + if abs_: op = op[1:-1] + if neg: inline_neg |= (1 << i) + if abs_: inline_abs |= (1 << i) + clean_args.append(_op2dsl(op)) + args = clean_args + args[4:] + if inline_neg: neg_lo = inline_neg + if inline_abs: neg_hi = inline_abs + + all_kw = list(kw) + if lit_s: all_kw.append(lit_s.lstrip(', ')) + if opsel is not None: all_kw.append(f'opsel={opsel}') + if neg_lo is not None: all_kw.append(f'neg={neg_lo}') + if neg_hi is not None: all_kw.append(f'neg_hi={neg_hi}') + if 'bvh' in mn and 'intersect_ray' in mn: all_kw.extend(['dmask=15', 'unrm=1', 'r128=1']) + + a_str, kw_str = ', '.join(args), ', '.join(all_kw) + return f"{fn}({a_str}, {kw_str})" if kw_str and a_str else f"{fn}({kw_str})" if kw_str else f"{fn}({a_str})" def asm(text: str) -> Inst: - """Assemble LLVM-style instruction text to Inst by transforming to DSL and eval.""" - from extra.assembly.amd.autogen import rdna3 as autogen - dsl_expr = get_dsl(text) - namespace = {name: getattr(autogen, name) for name in dir(autogen) if not name.startswith('_')} - namespace.update({'s': s, 'v': v, 'ttmp': ttmp, 'abs': abs, 'RawImm': RawImm, 'SrcMod': SrcMod, 'VGPR': VGPR, 'SGPR': SGPR, 'TTMP': TTMP, - 'VCC_LO': VCC_LO, 'VCC_HI': VCC_HI, 'VCC': VCC, 'EXEC_LO': EXEC_LO, 'EXEC_HI': EXEC_HI, 'EXEC': EXEC, - 'SCC': SCC, 'M0': M0, 'NULL': NULL, 'OFF': OFF}) - try: - return eval(dsl_expr, namespace) + from extra.assembly.amd.autogen import rdna3 as ag + dsl = get_dsl(text) + ns = {n: getattr(ag, n) for n in dir(ag) if not n.startswith('_')} + ns.update({'s': s, 'v': v, 'ttmp': ttmp, 'abs': abs, 'RawImm': RawImm, 'SrcMod': SrcMod, 'VGPR': VGPR, 'SGPR': SGPR, 'TTMP': TTMP, + 'VCC_LO': VCC_LO, 'VCC_HI': VCC_HI, 'VCC': VCC, 'EXEC_LO': EXEC_LO, 'EXEC_HI': EXEC_HI, 'EXEC': EXEC, 'SCC': SCC, 'M0': M0, 'NULL': NULL, 'OFF': OFF}) + try: return eval(dsl, ns) except NameError: - # Try with _e32 suffix for VOP1/VOP2/VOPC (only for v_* instructions) - if m := re.match(r'^(v_\w+)(\(.*\))$', dsl_expr): - return eval(f"{m.group(1)}_e32{m.group(2)}", namespace) + if m := re.match(r'^(v_\w+)(\(.*\))$', dsl): return eval(f"{m.group(1)}_e32{m.group(2)}", ns) raise diff --git a/extra/assembly/amd/autogen/cdna/__init__.py b/extra/assembly/amd/autogen/cdna/__init__.py index c1c1ecaaad..416e4b9ac8 100644 --- a/extra/assembly/amd/autogen/cdna/__init__.py +++ b/extra/assembly/amd/autogen/cdna/__init__.py @@ -2209,33 +2209,33 @@ buffer_atomic_xor_x2 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_XOR_X2) buffer_atomic_inc_x2 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_INC_X2) buffer_atomic_dec_x2 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_DEC_X2) cdna4 = functools.partial(MUBUF, MUBUFOp.CDNA4) -scratch_load_ubyte = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_UBYTE, seg=2) -scratch_load_sbyte = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SBYTE, seg=2) -scratch_load_ushort = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_USHORT, seg=2) -scratch_load_sshort = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SSHORT, seg=2) -scratch_load_dword = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_DWORD, seg=2) -scratch_load_dwordx2 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_DWORDX2, seg=2) -scratch_load_dwordx3 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_DWORDX3, seg=2) -scratch_load_dwordx4 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_DWORDX4, seg=2) -scratch_store_byte = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_BYTE, seg=2) -scratch_store_byte_d16_hi = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_BYTE_D16_HI, seg=2) -scratch_store_short = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_SHORT, seg=2) -scratch_store_short_d16_hi = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_SHORT_D16_HI, seg=2) -scratch_store_dword = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_DWORD, seg=2) -scratch_store_dwordx2 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_DWORDX2, seg=2) -scratch_store_dwordx3 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_DWORDX3, seg=2) -scratch_store_dwordx4 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_DWORDX4, seg=2) -scratch_load_ubyte_d16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_UBYTE_D16, seg=2) -scratch_load_ubyte_d16_hi = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_UBYTE_D16_HI, seg=2) -scratch_load_sbyte_d16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SBYTE_D16, seg=2) -scratch_load_sbyte_d16_hi = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SBYTE_D16_HI, seg=2) -scratch_load_short_d16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SHORT_D16, seg=2) -scratch_load_short_d16_hi = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SHORT_D16_HI, seg=2) -scratch_load_lds_ubyte = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_LDS_UBYTE, seg=2) -scratch_load_lds_sbyte = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_LDS_SBYTE, seg=2) -scratch_load_lds_ushort = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_LDS_USHORT, seg=2) -scratch_load_lds_sshort = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_LDS_SSHORT, seg=2) -scratch_load_lds_dword = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_LDS_DWORD, seg=2) +scratch_load_ubyte = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_UBYTE, seg=1) +scratch_load_sbyte = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SBYTE, seg=1) +scratch_load_ushort = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_USHORT, seg=1) +scratch_load_sshort = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SSHORT, seg=1) +scratch_load_dword = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_DWORD, seg=1) +scratch_load_dwordx2 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_DWORDX2, seg=1) +scratch_load_dwordx3 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_DWORDX3, seg=1) +scratch_load_dwordx4 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_DWORDX4, seg=1) +scratch_store_byte = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_BYTE, seg=1) +scratch_store_byte_d16_hi = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_BYTE_D16_HI, seg=1) +scratch_store_short = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_SHORT, seg=1) +scratch_store_short_d16_hi = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_SHORT_D16_HI, seg=1) +scratch_store_dword = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_DWORD, seg=1) +scratch_store_dwordx2 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_DWORDX2, seg=1) +scratch_store_dwordx3 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_DWORDX3, seg=1) +scratch_store_dwordx4 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_DWORDX4, seg=1) +scratch_load_ubyte_d16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_UBYTE_D16, seg=1) +scratch_load_ubyte_d16_hi = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_UBYTE_D16_HI, seg=1) +scratch_load_sbyte_d16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SBYTE_D16, seg=1) +scratch_load_sbyte_d16_hi = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SBYTE_D16_HI, seg=1) +scratch_load_short_d16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SHORT_D16, seg=1) +scratch_load_short_d16_hi = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_SHORT_D16_HI, seg=1) +scratch_load_lds_ubyte = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_LDS_UBYTE, seg=1) +scratch_load_lds_sbyte = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_LDS_SBYTE, seg=1) +scratch_load_lds_ushort = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_LDS_USHORT, seg=1) +scratch_load_lds_sshort = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_LDS_SSHORT, seg=1) +scratch_load_lds_dword = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_LDS_DWORD, seg=1) s_load_dword = functools.partial(SMEM, SMEMOp.S_LOAD_DWORD) s_load_dwordx2 = functools.partial(SMEM, SMEMOp.S_LOAD_DWORDX2) s_load_dwordx4 = functools.partial(SMEM, SMEMOp.S_LOAD_DWORDX4) diff --git a/extra/assembly/amd/autogen/rdna3/__init__.py b/extra/assembly/amd/autogen/rdna3/__init__.py index 65c8c928cd..a1d5780754 100644 --- a/extra/assembly/amd/autogen/rdna3/__init__.py +++ b/extra/assembly/amd/autogen/rdna3/__init__.py @@ -56,6 +56,12 @@ class DSOp(IntEnum): DS_MAX_F32 = 19 DS_NOP = 20 DS_ADD_F32 = 21 + DS_GWS_SEMA_RELEASE_ALL = 24 + DS_GWS_INIT = 25 + DS_GWS_SEMA_V = 26 + DS_GWS_SEMA_BR = 27 + DS_GWS_SEMA_P = 28 + DS_GWS_BARRIER = 29 DS_STORE_B8 = 30 DS_STORE_B16 = 31 DS_ADD_RTN_U32 = 32 @@ -178,10 +184,13 @@ class FLATOp(IntEnum): FLAT_LOAD_D16_HI_B16 = 35 FLAT_STORE_D16_HI_B8 = 36 FLAT_STORE_D16_HI_B16 = 37 + GLOBAL_LOAD_ADDTID_B32 = 40 + GLOBAL_STORE_ADDTID_B32 = 41 FLAT_ATOMIC_SWAP_B32 = 51 FLAT_ATOMIC_CMPSWAP_B32 = 52 FLAT_ATOMIC_ADD_U32 = 53 FLAT_ATOMIC_SUB_U32 = 54 + FLAT_ATOMIC_CSUB_U32 = 55 FLAT_ATOMIC_MIN_I32 = 56 FLAT_ATOMIC_MIN_U32 = 57 FLAT_ATOMIC_MAX_I32 = 58 @@ -717,6 +726,7 @@ class SOPPOp(IntEnum): S_SET_INST_PREFETCH_DISTANCE = 4 S_CLAUSE = 5 S_DELAY_ALU = 7 + S_WAITCNT_DEPCTR = 8 S_WAITCNT = 9 S_WAIT_IDLE = 10 S_WAIT_EVENT = 11 @@ -1848,6 +1858,12 @@ ds_min_f32 = functools.partial(DS, DSOp.DS_MIN_F32) ds_max_f32 = functools.partial(DS, DSOp.DS_MAX_F32) ds_nop = functools.partial(DS, DSOp.DS_NOP) ds_add_f32 = functools.partial(DS, DSOp.DS_ADD_F32) +ds_gws_sema_release_all = functools.partial(DS, DSOp.DS_GWS_SEMA_RELEASE_ALL) +ds_gws_init = functools.partial(DS, DSOp.DS_GWS_INIT) +ds_gws_sema_v = functools.partial(DS, DSOp.DS_GWS_SEMA_V) +ds_gws_sema_br = functools.partial(DS, DSOp.DS_GWS_SEMA_BR) +ds_gws_sema_p = functools.partial(DS, DSOp.DS_GWS_SEMA_P) +ds_gws_barrier = functools.partial(DS, DSOp.DS_GWS_BARRIER) ds_store_b8 = functools.partial(DS, DSOp.DS_STORE_B8) ds_store_b16 = functools.partial(DS, DSOp.DS_STORE_B16) ds_add_rtn_u32 = functools.partial(DS, DSOp.DS_ADD_RTN_U32) @@ -1968,10 +1984,13 @@ flat_load_d16_hi_i8 = functools.partial(FLAT, FLATOp.FLAT_LOAD_D16_HI_I8) flat_load_d16_hi_b16 = functools.partial(FLAT, FLATOp.FLAT_LOAD_D16_HI_B16) flat_store_d16_hi_b8 = functools.partial(FLAT, FLATOp.FLAT_STORE_D16_HI_B8) flat_store_d16_hi_b16 = functools.partial(FLAT, FLATOp.FLAT_STORE_D16_HI_B16) +global_load_addtid_b32 = functools.partial(FLAT, FLATOp.GLOBAL_LOAD_ADDTID_B32) +global_store_addtid_b32 = functools.partial(FLAT, FLATOp.GLOBAL_STORE_ADDTID_B32) flat_atomic_swap_b32 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_SWAP_B32) flat_atomic_cmpswap_b32 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_CMPSWAP_B32) flat_atomic_add_u32 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_ADD_U32) flat_atomic_sub_u32 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_SUB_U32) +flat_atomic_csub_u32 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_CSUB_U32) flat_atomic_min_i32 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_MIN_I32) flat_atomic_min_u32 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_MIN_U32) flat_atomic_max_i32 = functools.partial(FLAT, FLATOp.FLAT_ATOMIC_MAX_I32) @@ -2226,28 +2245,28 @@ buffer_atomic_cmpswap_f32 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_CMPSW buffer_atomic_min_f32 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_MIN_F32) buffer_atomic_max_f32 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_MAX_F32) buffer_atomic_add_f32 = functools.partial(MUBUF, MUBUFOp.BUFFER_ATOMIC_ADD_F32) -scratch_load_u8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_U8, seg=2) -scratch_load_i8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_I8, seg=2) -scratch_load_u16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_U16, seg=2) -scratch_load_i16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_I16, seg=2) -scratch_load_b32 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_B32, seg=2) -scratch_load_b64 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_B64, seg=2) -scratch_load_b96 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_B96, seg=2) -scratch_load_b128 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_B128, seg=2) -scratch_store_b8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_B8, seg=2) -scratch_store_b16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_B16, seg=2) -scratch_store_b32 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_B32, seg=2) -scratch_store_b64 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_B64, seg=2) -scratch_store_b96 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_B96, seg=2) -scratch_store_b128 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_B128, seg=2) -scratch_load_d16_u8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_D16_U8, seg=2) -scratch_load_d16_i8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_D16_I8, seg=2) -scratch_load_d16_b16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_D16_B16, seg=2) -scratch_load_d16_hi_u8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_D16_HI_U8, seg=2) -scratch_load_d16_hi_i8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_D16_HI_I8, seg=2) -scratch_load_d16_hi_b16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_D16_HI_B16, seg=2) -scratch_store_d16_hi_b8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_D16_HI_B8, seg=2) -scratch_store_d16_hi_b16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_D16_HI_B16, seg=2) +scratch_load_u8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_U8, seg=1) +scratch_load_i8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_I8, seg=1) +scratch_load_u16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_U16, seg=1) +scratch_load_i16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_I16, seg=1) +scratch_load_b32 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_B32, seg=1) +scratch_load_b64 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_B64, seg=1) +scratch_load_b96 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_B96, seg=1) +scratch_load_b128 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_B128, seg=1) +scratch_store_b8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_B8, seg=1) +scratch_store_b16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_B16, seg=1) +scratch_store_b32 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_B32, seg=1) +scratch_store_b64 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_B64, seg=1) +scratch_store_b96 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_B96, seg=1) +scratch_store_b128 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_B128, seg=1) +scratch_load_d16_u8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_D16_U8, seg=1) +scratch_load_d16_i8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_D16_I8, seg=1) +scratch_load_d16_b16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_D16_B16, seg=1) +scratch_load_d16_hi_u8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_D16_HI_U8, seg=1) +scratch_load_d16_hi_i8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_D16_HI_I8, seg=1) +scratch_load_d16_hi_b16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_LOAD_D16_HI_B16, seg=1) +scratch_store_d16_hi_b8 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_D16_HI_B8, seg=1) +scratch_store_d16_hi_b16 = functools.partial(FLAT, SCRATCHOp.SCRATCH_STORE_D16_HI_B16, seg=1) s_load_b32 = functools.partial(SMEM, SMEMOp.S_LOAD_B32) s_load_b64 = functools.partial(SMEM, SMEMOp.S_LOAD_B64) s_load_b128 = functools.partial(SMEM, SMEMOp.S_LOAD_B128) @@ -2485,6 +2504,7 @@ s_sleep = functools.partial(SOPP, SOPPOp.S_SLEEP) s_set_inst_prefetch_distance = functools.partial(SOPP, SOPPOp.S_SET_INST_PREFETCH_DISTANCE) s_clause = functools.partial(SOPP, SOPPOp.S_CLAUSE) s_delay_alu = functools.partial(SOPP, SOPPOp.S_DELAY_ALU) +s_waitcnt_depctr = functools.partial(SOPP, SOPPOp.S_WAITCNT_DEPCTR) s_waitcnt = functools.partial(SOPP, SOPPOp.S_WAITCNT) s_wait_idle = functools.partial(SOPP, SOPPOp.S_WAIT_IDLE) s_wait_event = functools.partial(SOPP, SOPPOp.S_WAIT_EVENT) diff --git a/extra/assembly/amd/dsl.py b/extra/assembly/amd/dsl.py index ef2f3aefdc..d4ac98f431 100644 --- a/extra/assembly/amd/dsl.py +++ b/extra/assembly/amd/dsl.py @@ -6,22 +6,21 @@ from typing import overload, Annotated, TypeVar, Generic # Bit field DSL class BitField: - def __init__(self, hi: int, lo: int, name: str | None = None): self.hi, self.lo, self.name = hi, lo, name - def __set_name__(self, owner, name): self.name, self._owner = name, owner + def __init__(self, hi: int, lo: int, name: str | None = None): self.hi, self.lo, self.name, self._marker = hi, lo, name, None + def __set_name__(self, owner, name): + import typing + self.name, self._owner = name, owner + # Cache marker at class definition time + hints = typing.get_type_hints(owner, include_extras=True) + if name in hints: + hint = hints[name] + if typing.get_origin(hint) is Annotated: + args = typing.get_args(hint) + self._marker = args[1] if len(args) > 1 else None def __eq__(self, val: int) -> tuple[BitField, int]: return (self, val) # type: ignore def mask(self) -> int: return (1 << (self.hi - self.lo + 1)) - 1 @property - def marker(self) -> type | None: - # Get marker from Annotated type hint if present - import typing - if hasattr(self, '_owner') and self.name: - hints = typing.get_type_hints(self._owner, include_extras=True) - if self.name in hints: - hint = hints[self.name] - if typing.get_origin(hint) is Annotated: - args = typing.get_args(hint) - return args[1] if len(args) > 1 else None - return None + def marker(self) -> type | None: return self._marker @overload def __get__(self, obj: None, objtype: type) -> BitField: ... @overload @@ -179,6 +178,21 @@ class Inst: raise ValueError(f"SOP1 {op_val.name} expects {expected} destination register(s), got {sdst_val.count}") if isinstance(ssrc0_val, Reg) and ssrc0_val.count != expected: raise ValueError(f"SOP1 {op_val.name} expects {expected} source register(s), got {ssrc0_val.count}") + # FLAT: set sve=1 when addr is a VGPR for scratch only + # For scratch (seg=1), sve=1 means addr VGPR is used; sve=0 means addr is "off" + # For global (seg=2) and flat (seg=0), sve is always 0 + if self.__class__.__name__ == 'FLAT' and 'sve' in self._fields: + seg_val = self._values.get('seg', 0) + if isinstance(seg_val, RawImm): seg_val = seg_val.val + addr_val = orig_args.get('addr') + if seg_val == 1 and isinstance(addr_val, VGPR): self._values['sve'] = 1 + # VOP3P: v_fma_mix* instructions (opcodes 32-34) have opsel_hi default of 0, not 7 + if self.__class__.__name__ == 'VOP3P': + op_val = orig_args.get(field_names[0]) if args else orig_args.get('op') + if hasattr(op_val, 'value'): op_val = op_val.value + if op_val in (32, 33, 34) and 'opsel_hi' not in orig_args and 'opsel_hi2' not in orig_args: + self._values['opsel_hi'] = 0 + self._values['opsel_hi2'] = 0 # Type check and encode values for name, val in list(self._values.items()): if name == 'encoding': continue @@ -340,6 +354,14 @@ class Inst: lit = f", literal={hex(self._literal)}" if self._literal is not None else "" return f"{self.__class__.__name__}({', '.join(f'{k}={v}' for k, v in items)}{lit})" + def __getattr__(self, name: str): + if name.startswith('_'): raise AttributeError(name) + return unwrap(self._values.get(name, 0)) + + def lit(self, v: int) -> str: + from extra.assembly.amd.asm import decode_src + return f"0x{self._literal:x}" if v == 255 and self._literal else decode_src(v) + def __eq__(self, other): if not isinstance(other, Inst): return NotImplemented return self.__class__ == other.__class__ and self._values == other._values and self._literal == other._literal @@ -519,10 +541,24 @@ def _parse_single_pdf(url: str) -> dict: break formats[fmt_name] = fields - # fix known PDF errors + # fix known PDF errors - assert if already present (so we know when the bug is fixed) if 'SMEM' in formats: formats['SMEM'] = [(n, 13 if n == 'DLC' else 14 if n == 'GLC' else h, 13 if n == 'DLC' else 14 if n == 'GLC' else l, e, t) for n, h, l, e, t in formats['SMEM']] + # add missing opcodes not in PDF tables (RDNA3/RDNA3.5 specific) + if doc_name in ('RDNA3', 'RDNA3.5'): + if 'SOPPOp' in enums: + assert 8 not in enums['SOPPOp'], "S_WAITCNT_DEPCTR now in PDF, remove workaround" + enums['SOPPOp'][8] = 'S_WAITCNT_DEPCTR' + if 'DSOp' in enums: + gws_ops = {24: 'DS_GWS_SEMA_RELEASE_ALL', 25: 'DS_GWS_INIT', 26: 'DS_GWS_SEMA_V', + 27: 'DS_GWS_SEMA_BR', 28: 'DS_GWS_SEMA_P', 29: 'DS_GWS_BARRIER'} + for k in gws_ops: assert k not in enums['DSOp'], f"{gws_ops[k]} now in PDF, remove workaround" + enums['DSOp'].update(gws_ops) + if 'FLATOp' in enums: + flat_ops = {40: 'GLOBAL_LOAD_ADDTID_B32', 41: 'GLOBAL_STORE_ADDTID_B32', 55: 'FLAT_ATOMIC_CSUB_U32'} + for k in flat_ops: assert k not in enums['FLATOp'], f"{flat_ops[k]} now in PDF, remove workaround" + enums['FLATOp'].update(flat_ops) return {"formats": formats, "enums": enums, "src_enum": src_enum, "doc_name": doc_name, "is_cdna": is_cdna} @@ -608,7 +644,7 @@ def generate(output_path: str | None = None, arch: str = "rdna3") -> dict: for cls_name, ops in sorted(enums.items()): fmt = cls_name[:-2] for op_val, name in sorted(ops.items()): - seg = {"GLOBAL": ", seg=2", "SCRATCH": ", seg=2"}.get(fmt, "") + seg = {"GLOBAL": ", seg=2", "SCRATCH": ", seg=1"}.get(fmt, "") tgt = {"GLOBAL": "FLAT, GLOBALOp", "SCRATCH": "FLAT, SCRATCHOp"}.get(fmt, f"{fmt}, {cls_name}") if fmt in formats or fmt in ("GLOBAL", "SCRATCH"): if fmt in ("VOP1", "VOP2", "VOPC"): diff --git a/extra/assembly/amd/emu.py b/extra/assembly/amd/emu.py index c99720aceb..18d50f6a25 100644 --- a/extra/assembly/amd/emu.py +++ b/extra/assembly/amd/emu.py @@ -3,6 +3,7 @@ from __future__ import annotations import ctypes, os from extra.assembly.amd.dsl import Inst, RawImm +from extra.assembly.amd.asm import detect_format from extra.assembly.amd.pcode import _f32, _i32, _sext, _f16, _i16, _f64, _i64 from extra.assembly.amd.autogen.rdna3.gen_pcode import get_compiled_functions from extra.assembly.amd.autogen.rdna3 import ( @@ -146,21 +147,7 @@ class WaveState: for reg, val in self._pend_sgpr.items(): self.sgpr[reg] = val self._pend_sgpr.clear() -# Instruction decode -def decode_format(word: int) -> tuple[type[Inst] | None, bool]: - hi2 = (word >> 30) & 0x3 - if hi2 == 0b11: - enc = (word >> 26) & 0xf - if enc == 0b1101: return SMEM, True - if enc == 0b0101: - op = (word >> 16) & 0x3ff - return (VOP3SD, True) if op in (288, 289, 290, 764, 765, 766, 767, 768, 769, 770) else (VOP3, True) - return {0b0011: (VOP3P, True), 0b0110: (DS, True), 0b0111: (FLAT, True), 0b0010: (VOPD, True)}.get(enc, (None, True)) - if hi2 == 0b10: - enc = (word >> 23) & 0x7f - return {0b1111101: (SOP1, False), 0b1111110: (SOPC, False), 0b1111111: (SOPP, False)}.get(enc, (SOPK, False) if ((word >> 28) & 0xf) == 0b1011 else (SOP2, False)) - enc = (word >> 25) & 0x7f - return (VOPC, False) if enc == 0b0111110 else (VOP1, False) if enc == 0b0111111 else (VOP2, False) + def _unwrap(v) -> int: return v.val if isinstance(v, RawImm) else v.value if hasattr(v, 'value') else v @@ -168,10 +155,10 @@ def decode_program(data: bytes) -> Program: result: Program = {} i = 0 while i < len(data): - word = int.from_bytes(data[i:i+4], 'little') - inst_class, is_64 = decode_format(word) + try: inst_class = detect_format(data[i:]) + except ValueError: break # stop at invalid instruction (padding/metadata after code) if inst_class is None: i += 4; continue - base_size = 8 if is_64 else 4 + base_size = inst_class._size() # Pass enough data for potential 64-bit literal (base + 8 bytes max) inst = inst_class.from_bytes(data[i:i+base_size+8]) for name, val in inst._values.items(): setattr(inst, name, _unwrap(val)) diff --git a/extra/assembly/amd/test/test_llvm.py b/extra/assembly/amd/test/test_llvm.py index 97b1cab756..955b3239a2 100644 --- a/extra/assembly/amd/test/test_llvm.py +++ b/extra/assembly/amd/test/test_llvm.py @@ -65,12 +65,18 @@ def parse_llvm_tests(text: str) -> list[tuple[str, bytes]]: if not asm_text: continue for j in range(i, min(i + 3, len(lines))): # Match GFX11, W32, or W64 encodings (all valid for gfx11) + # Format 1: "// GFX11: v_foo ... ; encoding: [0x01,0x02,...]" + # Format 2: "// GFX11: [0x01,0x02,...]" (used by DS, older files) if m := re.search(r'(?:GFX11|W32|W64)[^:]*:.*?encoding:\s*\[(.*?)\]', lines[j]): hex_bytes = m.group(1).replace('0x', '').replace(',', '').replace(' ', '') - if hex_bytes: - try: tests.append((asm_text, bytes.fromhex(hex_bytes))) - except ValueError: pass - break + elif m := re.search(r'(?:GFX11|W32|W64)[^:]*:\s*\[(0x[0-9a-fA-F,x\s]+)\]', lines[j]): + hex_bytes = m.group(1).replace('0x', '').replace(',', '').replace(' ', '') + else: + continue + if hex_bytes: + try: tests.append((asm_text, bytes.fromhex(hex_bytes))) + except ValueError: pass + break return tests def try_assemble(text: str): diff --git a/extra/assembly/amd/test/test_roundtrip.py b/extra/assembly/amd/test/test_roundtrip.py index bf9b68d869..d2660ab140 100644 --- a/extra/assembly/amd/test/test_roundtrip.py +++ b/extra/assembly/amd/test/test_roundtrip.py @@ -4,51 +4,9 @@ import unittest, io, sys, re, subprocess, os from extra.assembly.amd.autogen.rdna3 import * from extra.assembly.amd.dsl import Inst from extra.assembly.amd.asm import asm +from extra.assembly.amd.asm import detect_format from extra.assembly.amd.test.helpers import get_llvm_mc, get_llvm_objdump -# Instruction format detection based on encoding bits -def detect_format(data: bytes) -> type[Inst] | None: - """Detect instruction format from machine code bytes.""" - if len(data) < 4: return None - word = int.from_bytes(data[:4], 'little') - enc_9bit = (word >> 23) & 0x1FF # 9-bit encoding for SOP1/SOPC/SOPP - enc_8bit = (word >> 24) & 0xFF - - # Check 9-bit encodings first (most specific) - if enc_9bit == 0x17D: return SOP1 # bits 31:23 = 101111101 - if enc_9bit == 0x17E: return SOPC # bits 31:23 = 101111110 - if enc_9bit == 0x17F: return SOPP # bits 31:23 = 101111111 - # SOPK: bits 31:28 = 1011, bits 27:23 = opcode (check after SOP1/SOPC/SOPP) - if enc_8bit in range(0xB0, 0xC0): return SOPK - # SOP2: bits 31:23 in range 0x100-0x17C (0x80-0xBE in bits 31:24, but not SOPK) - if 0x80 <= enc_8bit <= 0x9F: return SOP2 - # VOP1: bits 31:25 = 0111111 (0x3F) - if (word >> 25) == 0x3F: return VOP1 - # VOPC: bits 31:25 = 0111110 (0x3E) - if (word >> 25) == 0x3E: return VOPC - # VOP2: bits 31:30 = 00 - if (word >> 30) == 0: return VOP2 - - # Check 64-bit formats - if len(data) >= 8: - if enc_8bit in (0xD4, 0xD5, 0xD7): - # VOP3 and VOP3SD share encoding - check opcode to determine which - # VOP3SD opcodes: 288-290 (v_*_co_ci_*), 764-770 (v_div_scale_*, v_mad_*, v_*_co_u32) - op = (int.from_bytes(data[:8], 'little') >> 16) & 0x3FF - if op in {288, 289, 290, 764, 765, 766, 767, 768, 769, 770}: return VOP3SD - return VOP3 - if enc_8bit == 0xD6: return VOP3SD - if enc_8bit == 0xCC: return VOP3P - if enc_8bit == 0xCD: return VINTERP - if enc_8bit in (0xC8, 0xC9): return VOPD - if enc_8bit == 0xF4: return SMEM - if enc_8bit == 0xD8: return DS - if enc_8bit in (0xDC, 0xDD, 0xDE, 0xDF): return FLAT - if enc_8bit in (0xE0, 0xE1, 0xE2, 0xE3): return MUBUF - if enc_8bit in (0xE8, 0xE9, 0xEA, 0xEB): return MTBUF - - return None - def disassemble_lib(lib: bytes, compiler) -> list[tuple[str, bytes]]: """Disassemble ELF binary and return list of (instruction_text, machine_code_bytes).""" old_stdout = sys.stdout