From a9a7b3340407beb3d6cb9e0e697b11711f94b9b9 Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 31 Dec 2025 12:56:59 -0500 Subject: [PATCH 01/25] IGNORE_OOB=0 in CI (#13903) --- .github/workflows/test.yml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 46230bf80b..7fe8d47100 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -5,6 +5,7 @@ env: CAPTURE_PROCESS_REPLAY: 1 GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} PYTHONPATH: ${{ github.workspace }} + IGNORE_OOB: 0 on: push: @@ -36,6 +37,8 @@ jobs: name: Docs runs-on: ubuntu-22.04 timeout-minutes: 10 + env: + IGNORE_OOB: 1 steps: - name: Checkout Code uses: actions/checkout@v4 @@ -307,7 +310,7 @@ jobs: deps: testing_unit python-version: '3.14' - name: Test SPEC=2 - run: IGNORE_OOB=0 SPEC=2 pytest --maxfail=10 -n auto --durations=30 --ignore=test/models --ignore test/test_custom_kernel.py --ignore test/unit/test_hashing.py --timeout 60 -k "not test_setitem_big" --splits 2 --group ${{ matrix.group }} + run: SPEC=2 pytest --maxfail=10 -n auto --durations=30 --ignore=test/models --ignore test/test_custom_kernel.py --ignore test/unit/test_hashing.py --timeout 60 -k "not test_setitem_big" --splits 2 --group ${{ matrix.group }} fuzzing: name: Fuzzing @@ -470,6 +473,8 @@ jobs: name: Test LLM runs-on: ubuntu-24.04 timeout-minutes: 15 + env: + IGNORE_OOB: 1 steps: - name: Checkout Code uses: actions/checkout@v4 @@ -584,8 +589,6 @@ jobs: name: Linux (WebGPU) runs-on: ubuntu-22.04 timeout-minutes: 20 - env: - IGNORE_OOB: 0 steps: - name: Checkout Code uses: actions/checkout@v4 @@ -822,7 +825,6 @@ jobs: NV_PTX: 1 NV: 1 FORWARD_ONLY: 1 - IGNORE_OOB: 0 run: | python3 -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py --durations=20 - name: Run process replay tests @@ -832,8 +834,6 @@ jobs: name: MacOS (WebGPU) runs-on: macos-14 timeout-minutes: 10 - env: - IGNORE_OOB: 0 steps: - name: Checkout Code uses: actions/checkout@v4 @@ -911,8 +911,6 @@ jobs: name: Windows (${{ matrix.backend }}) runs-on: windows-latest timeout-minutes: 15 - env: - IGNORE_OOB: 0 steps: - name: Checkout Code uses: actions/checkout@v4 From 051fe6c8bcc822d77c302b9dc5014d3afe3db0e5 Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 31 Dec 2025 13:16:34 -0500 Subject: [PATCH 02/25] less toposort iteration in oob validate (#13929) --- tinygrad/uop/validate.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tinygrad/uop/validate.py b/tinygrad/uop/validate.py index 02552cb495..42a50d51e3 100644 --- a/tinygrad/uop/validate.py +++ b/tinygrad/uop/validate.py @@ -67,11 +67,10 @@ def validate_index(buf:UOp, idx:UOp, gate:UOp|None=None): # We can use UOp min/max to do a faster check, but it can give false positive since its not an exact bound and doesn't consider the mask if 0<=idx.vmin and idx.vmax= 4.12.4, use IGNORE_OOB=1 to disable, or \"pip install 'z3-solver>=4.12.4\"") solver = z3.Solver(ctx=z3.Context()) From 13973e4dea91fd16e2467b7542999641daedb9da Mon Sep 17 00:00:00 2001 From: Christopher Milan Date: Wed, 31 Dec 2025 10:22:38 -0800 Subject: [PATCH 03/25] refactor image pitch (#13928) --- test/device/test_qcom.py | 74 --------------------------------- test/test_image_dtype.py | 60 ++++++++++++++++++++++++++ tinygrad/dtype.py | 11 ++++- tinygrad/helpers.py | 1 + tinygrad/runtime/graph/hcq.py | 2 +- tinygrad/runtime/ops_qcom.py | 58 ++++++++++---------------- tinygrad/runtime/support/hcq.py | 7 ++-- 7 files changed, 99 insertions(+), 114 deletions(-) delete mode 100644 test/device/test_qcom.py diff --git a/test/device/test_qcom.py b/test/device/test_qcom.py deleted file mode 100644 index 827a364d1d..0000000000 --- a/test/device/test_qcom.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python -import unittest -from tinygrad.device import Device, BufferSpec -from tinygrad.dtype import dtypes - -@unittest.skipUnless(Device.DEFAULT == "QCOM", "QCOM device required to run") -class TestQcom(unittest.TestCase): - def test_image_pitch(self): - dev = Device["QCOM"] - - def __validate(imgdt, expected_pitch): - img = dev.allocator.alloc(imgdt.shape[0] * imgdt.shape[1] * 16, options:=BufferSpec(image=imgdt)) - pitch = img.texture_info.pitch - assert pitch == expected_pitch, f"Failed pitch for image: {imgdt}. Got 0x{pitch:X}, expected 0x{expected_pitch:X}" - dev.allocator.free(img, imgdt.shape[0] * imgdt.shape[1] * 16, options) - - # Match opencl pitches for perf - __validate(dtypes.imageh((1, 201)), 0x680) - __validate(dtypes.imageh((16, 216)), 0x700) - __validate(dtypes.imageh((16, 9)), 0x80) - __validate(dtypes.imageh((48, 64)), 0x200) - __validate(dtypes.imageh((32, 128)), 0x400) - __validate(dtypes.imageh((96, 128)), 0x400) - __validate(dtypes.imageh((64, 256)), 0x840) - __validate(dtypes.imageh((64, 9)), 0x80) - __validate(dtypes.imageh((192, 256)), 0x840) - __validate(dtypes.imageh((64, 768)), 0x1840) - __validate(dtypes.imageh((256, 49)), 0x1C0) - __validate(dtypes.imageh((128, 9)), 0x80) - __validate(dtypes.imageh((16, 1024)), 0x2080) - __validate(dtypes.imageh((64, 512)), 0x1040) - __validate(dtypes.imageh((16, 512)), 0x1080) - __validate(dtypes.imageh((132, 64)), 0x200) - __validate(dtypes.imageh((4, 512)), 0x1200) - __validate(dtypes.imageh((8, 512)), 0x1100) - __validate(dtypes.imageh((128, 128)), 0x400) - __validate(dtypes.imageh((32, 512)), 0x1040) - __validate(dtypes.imageh((26, 64)), 0x200) - __validate(dtypes.imageh((32, 516)), 0x1040) - __validate(dtypes.imageh((32, 1024)), 0x2040) - __validate(dtypes.imageh((16, 2048)), 0x4080) - __validate(dtypes.imageh((8, 2048)), 0x4100) - __validate(dtypes.imageh((4, 4096)), 0x8200) - - __validate(dtypes.imagef((16, 49)), 0x380) - __validate(dtypes.imagef((16, 1024)), 0x4080) - __validate(dtypes.imagef((256, 64)), 0x400) - __validate(dtypes.imagef((64, 512)), 0x2040) - __validate(dtypes.imagef((16, 512)), 0x2080) - __validate(dtypes.imagef((132, 64)), 0x400) - __validate(dtypes.imagef((4, 512)), 0x2200) - __validate(dtypes.imagef((4, 16)), 0x200) - __validate(dtypes.imagef((2, 16)), 0x400) - __validate(dtypes.imagef((8, 512)), 0x2100) - __validate(dtypes.imagef((12, 64)), 0x400) - __validate(dtypes.imagef((3, 32)), 0x400) - __validate(dtypes.imagef((128, 128)), 0x840) - __validate(dtypes.imagef((32, 512)), 0x2040) - __validate(dtypes.imagef((8, 3072)), 0xC100) - __validate(dtypes.imagef((4, 2048)), 0x8200) - __validate(dtypes.imagef((4, 1024)), 0x4200) - __validate(dtypes.imagef((4, 4096)), 0x10200) - __validate(dtypes.imagef((10, 384)), 0x1900) - __validate(dtypes.imagef((24, 64)), 0x400) - __validate(dtypes.imagef((128, 12)), 0xC0) - __validate(dtypes.imagef((10, 24)), 0x200) - __validate(dtypes.imagef((1, 129)), 0x840) - __validate(dtypes.imagef((1, 32)), 0x200) - __validate(dtypes.imagef((1, 64)), 0x400) - __validate(dtypes.imagef((1, 1239)), 0x4D80) - __validate(dtypes.imagef((1, 1)), 0x40) - -if __name__ == "__main__": - unittest.main() diff --git a/test/test_image_dtype.py b/test/test_image_dtype.py index 1ec95939be..ecf37db581 100644 --- a/test/test_image_dtype.py +++ b/test/test_image_dtype.py @@ -44,6 +44,66 @@ class TestImageCopy(unittest.TestCase): @unittest.skipUnless(REAL_DEV in IMAGE_SUPPORTED_DEVICES, "Images not supported") class TestImageDType(unittest.TestCase): + def test_image_pitch(self): + def __validate(imgdt, expected_pitch): + assert imgdt.pitch == expected_pitch, f"Failed pitch for image: {imgdt}. Got 0x{imgdt.pitch:X}, expected 0x{expected_pitch:X}" + + # Match opencl pitches for perf + __validate(dtypes.imageh((1, 201)), 0x680) + __validate(dtypes.imageh((16, 216)), 0x700) + __validate(dtypes.imageh((16, 9)), 0x80) + __validate(dtypes.imageh((48, 64)), 0x200) + __validate(dtypes.imageh((32, 128)), 0x400) + __validate(dtypes.imageh((96, 128)), 0x400) + __validate(dtypes.imageh((64, 256)), 0x840) + __validate(dtypes.imageh((64, 9)), 0x80) + __validate(dtypes.imageh((192, 256)), 0x840) + __validate(dtypes.imageh((64, 768)), 0x1840) + __validate(dtypes.imageh((256, 49)), 0x1C0) + __validate(dtypes.imageh((128, 9)), 0x80) + __validate(dtypes.imageh((16, 1024)), 0x2080) + __validate(dtypes.imageh((64, 512)), 0x1040) + __validate(dtypes.imageh((16, 512)), 0x1080) + __validate(dtypes.imageh((132, 64)), 0x200) + __validate(dtypes.imageh((4, 512)), 0x1200) + __validate(dtypes.imageh((8, 512)), 0x1100) + __validate(dtypes.imageh((128, 128)), 0x400) + __validate(dtypes.imageh((32, 512)), 0x1040) + __validate(dtypes.imageh((26, 64)), 0x200) + __validate(dtypes.imageh((32, 516)), 0x1040) + __validate(dtypes.imageh((32, 1024)), 0x2040) + __validate(dtypes.imageh((16, 2048)), 0x4080) + __validate(dtypes.imageh((8, 2048)), 0x4100) + __validate(dtypes.imageh((4, 4096)), 0x8200) + + __validate(dtypes.imagef((16, 49)), 0x380) + __validate(dtypes.imagef((16, 1024)), 0x4080) + __validate(dtypes.imagef((256, 64)), 0x400) + __validate(dtypes.imagef((64, 512)), 0x2040) + __validate(dtypes.imagef((16, 512)), 0x2080) + __validate(dtypes.imagef((132, 64)), 0x400) + __validate(dtypes.imagef((4, 512)), 0x2200) + __validate(dtypes.imagef((4, 16)), 0x200) + __validate(dtypes.imagef((2, 16)), 0x400) + __validate(dtypes.imagef((8, 512)), 0x2100) + __validate(dtypes.imagef((12, 64)), 0x400) + __validate(dtypes.imagef((3, 32)), 0x400) + __validate(dtypes.imagef((128, 128)), 0x840) + __validate(dtypes.imagef((32, 512)), 0x2040) + __validate(dtypes.imagef((8, 3072)), 0xC100) + __validate(dtypes.imagef((4, 2048)), 0x8200) + __validate(dtypes.imagef((4, 1024)), 0x4200) + __validate(dtypes.imagef((4, 4096)), 0x10200) + __validate(dtypes.imagef((10, 384)), 0x1900) + __validate(dtypes.imagef((24, 64)), 0x400) + __validate(dtypes.imagef((128, 12)), 0xC0) + __validate(dtypes.imagef((10, 24)), 0x200) + __validate(dtypes.imagef((1, 129)), 0x840) + __validate(dtypes.imagef((1, 32)), 0x200) + __validate(dtypes.imagef((1, 64)), 0x400) + __validate(dtypes.imagef((1, 1239)), 0x4D80) + __validate(dtypes.imagef((1, 1)), 0x40) + def test_image_and_back(self): data = Tensor.randn(9*27*4).realize() tst = data.numpy() diff --git a/tinygrad/dtype.py b/tinygrad/dtype.py index 454d785738..7b37a321c2 100644 --- a/tinygrad/dtype.py +++ b/tinygrad/dtype.py @@ -2,7 +2,7 @@ from __future__ import annotations from typing import Final, ClassVar, Callable, Literal import math, struct, ctypes, functools from dataclasses import dataclass, fields -from tinygrad.helpers import getenv, prod +from tinygrad.helpers import getenv, prod, round_up, next_power2 from enum import Enum, auto class InvalidTypeMetaClass(type): @@ -101,6 +101,15 @@ class ImageDType(PtrDType): assert addrspace == AddrSpace.GLOBAL, "images can't be local" return self def __repr__(self): return f"dtypes.{self.name}({self.shape})" + (f'.vec({self.v})' if self.v != 1 else '') + @property + def pitch(self): + imgw, imgh, itemsize_log = self.shape[1], self.shape[0], int(math.log2(self.itemsize)) + pitchalign = max(6, 11 - int(math.log2(imgh))) if imgh > 1 else 6 + align_up = max(1, (8 // itemsize_log + 1) - imgh // 32) if pitchalign == 6 else (2 ** (pitchalign - itemsize_log - 2)) + + granularity = 128 if self.itemsize == 4 else 256 + pitch_add = (1 << pitchalign) if min(next_power2(imgw), round_up(imgw, granularity)) - align_up + 1 <= imgw and imgw > granularity//2 else 0 + return round_up(imgw * 4 * self.itemsize, 1 << pitchalign) + pitch_add class dtypes: @staticmethod diff --git a/tinygrad/helpers.py b/tinygrad/helpers.py index 5e08ef2fa4..aca12c4d3b 100644 --- a/tinygrad/helpers.py +++ b/tinygrad/helpers.py @@ -50,6 +50,7 @@ def strip_parens(fst:str) -> str: return fst[1:-1] if fst and fst[0]=='(' and fs def ceildiv(num, amt): return int(ret) if isinstance((ret:=-(num//-amt)), float) else ret def round_up(num:int, amt:int) -> int: return (num+amt-1)//amt * amt def round_down(num:int, amt:int) -> int: return -round_up(-num, amt) +def next_power2(x): return 1 if x == 0 else 1 << (x - 1).bit_length() # cstyle div and mod def cdiv(x:int, y:int) -> int: return abs(x)//abs(y)*(1,-1)[x*y<0] if y != 0 else 0 def cmod(x:int, y:int) -> int: return x-cdiv(x,y)*y diff --git a/tinygrad/runtime/graph/hcq.py b/tinygrad/runtime/graph/hcq.py index 6eaf1fa7b9..ec62d829c0 100644 --- a/tinygrad/runtime/graph/hcq.py +++ b/tinygrad/runtime/graph/hcq.py @@ -22,7 +22,7 @@ class HCQGraph(MultiGraphRunner): for (j,i), input_idx in self.input_replace.items(): x = self.input_replace_to_var.setdefault((j,i), UOp.variable(f"input_{input_idx}", 0, 0xffffffffffffffff, dtype=dtypes.uint64)) - self.hcq_bufs[j][i] = HCQBuffer(x, self.hcq_bufs[j][i].size, texture_info=self.hcq_bufs[j][i].texture_info) # Create fake buffer with variable + self.hcq_bufs[j][i] = HCQBuffer(x, self.hcq_bufs[j][i].size, image=self.hcq_bufs[j][i].image) # Create fake buffer with variable # Allocate kernel args. kernargs_size: dict[Compiled, int] = collections.defaultdict(int) diff --git a/tinygrad/runtime/ops_qcom.py b/tinygrad/runtime/ops_qcom.py index a27f9b98a1..deec87f582 100644 --- a/tinygrad/runtime/ops_qcom.py +++ b/tinygrad/runtime/ops_qcom.py @@ -10,7 +10,7 @@ from tinygrad.runtime.ops_cl import CLCompiler, CLDevice from tinygrad.renderer.cstyle import QCOMRenderer from tinygrad.renderer.nir import IR3Renderer from tinygrad.helpers import getenv, mv_address, to_mv, round_up, data64_le, prod, fromimport, cpu_profile, lo32, PROFILE, suppress_finalizing -from tinygrad.helpers import flatten, QCOM_IR3, QCOM_CC +from tinygrad.helpers import next_power2, flatten, QCOM_IR3, QCOM_CC from tinygrad.runtime.support.system import System if getenv("IOCTL"): import extra.qcom_gpu_driver.opencl_ioctl # noqa: F401 # pylint: disable=unused-import @@ -25,7 +25,7 @@ def _qreg_exec(__reg, __val=0, **kwargs): return __val qreg: Any = type("QREG", (object,), {name[4:].lower(): functools.partial(_qreg_exec, name) for name in mesa.__dict__.keys() if name[:4] == 'REG_'}) -def next_power2(x): return 1 if x == 0 else 1 << (x - 1).bit_length() +def ctz(v): return (v & -v).bit_length() - 1 def parity(val: int): for i in range(4,1,-1): val ^= val >> (1 << i) @@ -192,9 +192,8 @@ class QCOMArgsState(HCQArgsState): super().__init__(buf, prg, bufs, vals=vals) ctypes.memset(cast(int, self.buf.va_addr), 0, prg.kernargs_alloc_size) - ubos, uavs = [b for b in bufs if b.texture_info is None], [b for b in bufs if b.texture_info is not None] + ubos, uavs = [b for b in bufs if b.image is None], [b for b in bufs if b.image is not None] ibos, texs = (uavs, []) if prg.tex_cnt == 0 else (uavs[:-prg.tex_cnt], uavs[-prg.tex_cnt:]) - for cnst_val,cnst_off,cnst_sz in prg.consts_info: to_mv(self.buf.va_addr + cnst_off, cnst_sz)[:] = cnst_val.to_bytes(cnst_sz, byteorder='little') if prg.samp_cnt > 0: to_mv(self.buf.va_addr + prg.samp_off, len(prg.samplers) * 4).cast('I')[:] = array.array('I', prg.samplers) @@ -205,8 +204,15 @@ class QCOMArgsState(HCQArgsState): for i, b in enumerate(ubos): self.bind_sints_to_buf(b.va_addr, buf=self.buf, fmt='Q', offset=prg.buf_offs[i]) for i, v in enumerate(vals): self.bind_sints_to_buf(v, buf=self.buf, fmt='I', offset=prg.buf_offs[i+len(ubos)]) - self.bind_sints_to_buf(*flatten([b.texture_info.desc + ([0] * 8) for b in texs]), buf=self.buf, fmt='I', offset=prg.tex_off) - self.bind_sints_to_buf(*flatten([b.texture_info.ibo + ([0] * 8) for b in ibos]), buf=self.buf, fmt='I', offset=prg.ibo_off) + def _tex(b, ibo=False): + fmt = mesa.FMT6_32_32_32_32_FLOAT if b.image.itemsize == 4 else mesa.FMT6_16_16_16_16_FLOAT + return [qreg.a6xx_tex_const_0(fmt=fmt) if ibo else qreg.a6xx_tex_const_0(0x8, swiz_x=0, swiz_y=1, swiz_z=2, swiz_w=3, fmt=fmt), + qreg.a6xx_tex_const_1(width=b.image.shape[1], height=b.image.shape[0]), + qreg.a6xx_tex_const_2(type=mesa.A6XX_TEX_2D, pitch=b.image.pitch, pitchalign=ctz(b.image.pitch)-6), 0, *data64_le(b.va_addr), + qreg.a6xx_tex_const_6(plane_pitch=0x400000), qreg.a6xx_tex_const_7(13), 0, 0, 0, 0, 0, 0, 0, 0] + + self.bind_sints_to_buf(*flatten(map(_tex, texs)), buf=self.buf, fmt='I', offset=prg.tex_off) + self.bind_sints_to_buf(*flatten(map(functools.partial(_tex, ibo=True), ibos)), buf=self.buf, fmt='I', offset=prg.ibo_off) class QCOMProgram(HCQProgram): def __init__(self, dev: QCOMDevice, name: str, lib: bytes): @@ -305,28 +311,10 @@ class QCOMTextureInfo: self.pitch, self.real_stride, self.desc, self.ibo = pitch, real_stride, desc, ibo class QCOMAllocator(HCQAllocatorBase): - def _alloc(self, size:int, options:BufferSpec) -> HCQBuffer: + def _alloc(self, size:int, opts:BufferSpec) -> HCQBuffer: # Recalculate real size for texture - if options.image is not None: - imgw, imgh, itemsize_log = options.image.shape[1], options.image.shape[0], int(math.log2(options.image.itemsize)) - pitchalign = max(6, 11 - int(math.log2(imgh))) if imgh > 1 else 6 - align_up = max(1, (8 // itemsize_log + 1) - imgh // 32) if pitchalign == 6 else (2 ** (pitchalign - itemsize_log - 2)) - - granularity = 128 if options.image.itemsize == 4 else 256 - pitch_add = (1 << pitchalign) if min(next_power2(imgw), round_up(imgw, granularity)) - align_up + 1 <= imgw and imgw > granularity//2 else 0 - pitch = round_up((real_stride:=imgw * 4 * options.image.itemsize), 1 << pitchalign) + pitch_add - size = pitch * imgh - - buf = self.dev._gpu_map(options.external_ptr, size) if options.external_ptr else self.dev._gpu_alloc(size) - - if options.image is not None: - tex_fmt = mesa.FMT6_32_32_32_32_FLOAT if options.image.itemsize == 4 else mesa.FMT6_16_16_16_16_FLOAT - desc = [qreg.a6xx_tex_const_0(0x8, swiz_x=0, swiz_y=1, swiz_z=2, swiz_w=3, fmt=tex_fmt), qreg.a6xx_tex_const_1(width=imgw, height=imgh), - qreg.a6xx_tex_const_2(type=mesa.A6XX_TEX_2D, pitch=pitch, pitchalign=pitchalign-6), 0, - *data64_le(buf.va_addr), qreg.a6xx_tex_const_6(plane_pitch=0x400000), qreg.a6xx_tex_const_7(13)] - - buf.texture_info = QCOMTextureInfo(pitch, real_stride, desc, [desc[0] & (~0xffff), *desc[1:len(desc)]]) - return buf + if opts.image is not None: size = opts.image.pitch* opts.image.shape[0] + return self.dev._gpu_map(opts.external_ptr, size, image=opts.image) if opts.external_ptr else self.dev._gpu_alloc(size, image=opts.image) def _do_copy(self, src_addr, dest_addr, src_size, real_size, src_stride, dest_stride, prof_text, dest_off=0, src_off=0): with cpu_profile(prof_text, self.dev.device, is_copy=True): @@ -335,13 +323,13 @@ class QCOMAllocator(HCQAllocatorBase): src_off, dest_off = src_off+src_stride, dest_off+dest_stride def _copyin(self, dest:HCQBuffer, src:memoryview): - stride, pitch = (src.nbytes, src.nbytes) if (ti:=cast(QCOMTextureInfo, dest.texture_info)) is None else (ti.real_stride, ti.pitch) + stride, pitch = (dest.image.shape[1] * 4 * dest.image.itemsize, dest.image.pitch) if dest.image else (src.nbytes, src.nbytes) self._do_copy(mv_address(src), dest.cpu_view().addr, src.nbytes, stride, stride, pitch, f"TINY -> {self.dev.device}") def _copyout(self, dest:memoryview, src:HCQBuffer): self.dev.synchronize() - stride, pitch = (src.size, src.size) if (ti:=cast(QCOMTextureInfo, src.texture_info)) is None else (ti.real_stride, ti.pitch) + stride, pitch = (src.image.shape[1] * 4 * src.image.itemsize, src.image.pitch) if src.image else (src.size, src.size) self._do_copy(src.cpu_view().addr, mv_address(dest), src.size, stride, pitch, stride, f"{self.dev.device} -> TINY") def _as_buffer(self, src:HCQBuffer) -> memoryview: @@ -388,7 +376,7 @@ class QCOMDevice(HCQCompiled): super().__init__(device, QCOMAllocator(self), compilers, functools.partial(QCOMProgram, self), QCOMSignal, functools.partial(QCOMComputeQueue, self), None) - def _gpu_alloc(self, size:int, flags:int=0, uncached=False, fill_zeroes=False) -> HCQBuffer: + def _gpu_alloc(self, size:int, flags:int=0, uncached=False, fill_zeroes=False, **kwargs) -> HCQBuffer: flags |= flag("KGSL_MEMALIGN", alignment_hint:=12) | kgsl.KGSL_MEMFLAGS_USE_CPU_MAP if uncached: flags |= flag("KGSL_CACHEMODE", kgsl.KGSL_CACHEMODE_UNCACHED) @@ -396,15 +384,15 @@ class QCOMDevice(HCQCompiled): va_addr = self.fd.mmap(0, bosz, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED, alloc.id * 0x1000) if fill_zeroes: ctypes.memset(va_addr, 0, size) - return HCQBuffer(va_addr=va_addr, size=size, meta=(alloc, True), view=MMIOInterface(va_addr, size, fmt='B'), owner=self) + return HCQBuffer(va_addr=va_addr, size=size, meta=(alloc, True), view=MMIOInterface(va_addr, size, fmt='B'), owner=self, **kwargs) - def _gpu_map(self, ptr:int, size:int) -> HCQBuffer: + def _gpu_map(self, ptr:int, size:int, **kwargs) -> HCQBuffer: ptr_aligned, size_aligned = (ptr & ~0xfff), round_up(size + (ptr & 0xfff), 0x1000) try: - mapinfo = kgsl.IOCTL_KGSL_MAP_USER_MEM(self.fd, hostptr=ptr_aligned, len=size_aligned, memtype=kgsl.KGSL_USER_MEM_TYPE_ADDR) - return HCQBuffer(mapinfo.gpuaddr + (ptr - ptr_aligned), size=size, meta=(mapinfo, False), view=MMIOInterface(ptr, size, fmt='B'), owner=self) + mi = kgsl.IOCTL_KGSL_MAP_USER_MEM(self.fd, hostptr=ptr_aligned, len=size_aligned, memtype=kgsl.KGSL_USER_MEM_TYPE_ADDR) + return HCQBuffer(mi.gpuaddr + (ptr - ptr_aligned), size=size, meta=(mi, False), view=MMIOInterface(ptr, size, fmt='B'), owner=self, **kwargs) except OSError as e: - if e.errno == 14: return HCQBuffer(va_addr=ptr, size=size, meta=(None, False), view=MMIOInterface(ptr, size, fmt='B'), owner=self) + if e.errno == 14: return HCQBuffer(va_addr=ptr, size=size, meta=(None, False), view=MMIOInterface(ptr, size, fmt='B'), owner=self, **kwargs) raise RuntimeError("Failed to map external pointer to GPU memory") from e def _gpu_free(self, mem:HCQBuffer): diff --git a/tinygrad/runtime/support/hcq.py b/tinygrad/runtime/support/hcq.py index f64dbd6188..a3bfbe1315 100644 --- a/tinygrad/runtime/support/hcq.py +++ b/tinygrad/runtime/support/hcq.py @@ -8,6 +8,7 @@ from tinygrad.device import BufferSpec, Compiled, LRUAllocator, ProfileDeviceEve from tinygrad.uop.ops import sym_infer, sint, UOp from tinygrad.runtime.autogen import libc from tinygrad.runtime.support.memory import BumpAllocator +from tinygrad.dtype import ImageDType class MMIOInterface: def __init__(self, addr:int, nbytes:int, fmt='B'): self.mv, self.addr, self.nbytes, self.fmt = to_mv(addr, nbytes).cast(fmt), addr, nbytes, fmt @@ -455,14 +456,14 @@ class HCQCompiled(Compiled, Generic[SignalType]): if hasattr(self, 'iface') and hasattr(self.iface, 'device_fini'): self.iface.device_fini() class HCQBuffer: - def __init__(self, va_addr:sint, size:int, texture_info:Any=None, meta:Any=None, _base:HCQBuffer|None=None, view:MMIOInterface|None=None, + def __init__(self, va_addr:sint, size:int, image:ImageDType|None=None, meta:Any=None, _base:HCQBuffer|None=None, view:MMIOInterface|None=None, owner:HCQCompiled|None=None): - self.va_addr, self.size, self.texture_info, self.meta, self._base, self.view = va_addr, size, texture_info, meta, _base, view + self.va_addr, self.size, self.image, self.meta, self._base, self.view = va_addr, size, image, meta, _base, view self._devs, self.owner = ([owner] if owner is not None else []), owner self._mappings:dict[HCQCompiled, HCQBuffer] = {} # mapping to the other devices def offset(self, offset:int=0, size:int|None=None) -> HCQBuffer: - return HCQBuffer(self.va_addr+offset, size or (self.size - offset), owner=self.owner, texture_info=self.texture_info, meta=self.meta, + return HCQBuffer(self.va_addr+offset, size or (self.size - offset), owner=self.owner, image=self.image, meta=self.meta, _base=self._base or self, view=(self.view.view(offset=offset, size=size) if self.view is not None else None)) def cpu_view(self) -> MMIOInterface: From f14428090fd5d9847827587e1ef0ff9a971e5df9 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Wed, 31 Dec 2025 13:32:25 -0500 Subject: [PATCH 04/25] assembly/amd: speed up emulator (#13932) --- extra/assembly/amd/dsl.py | 88 +++++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 35 deletions(-) diff --git a/extra/assembly/amd/dsl.py b/extra/assembly/amd/dsl.py index fdbc7b462c..cde5ef6984 100644 --- a/extra/assembly/amd/dsl.py +++ b/extra/assembly/amd/dsl.py @@ -1,48 +1,56 @@ # library for RDNA3 assembly DSL # mypy: ignore-errors from __future__ import annotations -import struct, math +import struct, math, re from enum import IntEnum +from functools import cache, cached_property from typing import overload, Annotated, TypeVar, Generic from extra.assembly.amd.autogen.rdna3.enum import (VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp, VOPDOp, SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, SMEMOp, DSOp, FLATOp, MUBUFOp, MTBUFOp, MIMGOp, VINTERPOp) # Common masks and bit conversion functions MASK32, MASK64 = 0xffffffff, 0xffffffffffffffff -def _f32(i): return struct.unpack(" 0 else 0xff800000 - try: return struct.unpack(" 0 else 0xff800000 def _sext(v, b): return v - (1 << b) if v & (1 << (b - 1)) else v -def _f16(i): return struct.unpack(" 0 else 0xfc00 - try: return struct.unpack(" 0 else 0xfc00 -def _f64(i): return struct.unpack(" 0 else 0xfff0000000000000 - try: return struct.unpack(" 0 else 0xfff0000000000000 # Instruction spec - register counts and dtypes derived from instruction names -import re _REGS = {'B32': 1, 'B64': 2, 'B96': 3, 'B128': 4, 'B256': 8, 'B512': 16, 'F32': 1, 'I32': 1, 'U32': 1, 'F64': 2, 'I64': 2, 'U64': 2, 'F16': 1, 'I16': 1, 'U16': 1, 'B16': 1, 'I8': 1, 'U8': 1, 'B8': 1} +_CVT_RE = re.compile(r'CVT_([FIUB]\d+)_([FIUB]\d+)$') +_MAD_MUL_RE = re.compile(r'(?:MAD|MUL)_([IU]\d+)_([IU]\d+)$') +_PACK_RE = re.compile(r'PACK_([FIUB]\d+)_([FIUB]\d+)$') +_DST_SRC_RE = re.compile(r'_([FIUB]\d+)_([FIUB]\d+)$') +_SINGLE_RE = re.compile(r'_([FIUB](?:32|64|16|8|96|128|256|512))$') +@cache def _suffix(name: str) -> tuple[str | None, str | None]: name = name.upper() - if m := re.search(r'CVT_([FIUB]\d+)_([FIUB]\d+)$', name): return m.group(1), m.group(2) - if m := re.search(r'(?:MAD|MUL)_([IU]\d+)_([IU]\d+)$', name): return m.group(1), m.group(2) - if m := re.search(r'PACK_([FIUB]\d+)_([FIUB]\d+)$', name): return m.group(1), m.group(2) - # Generic dst_src pattern: S_BCNT0_I32_B64, S_BITREPLICATE_B64_B32, V_FREXP_EXP_I32_F64, etc. - if m := re.search(r'_([FIUB]\d+)_([FIUB]\d+)$', name): return m.group(1), m.group(2) - if m := re.search(r'_([FIUB](?:32|64|16|8|96|128|256|512))$', name): return m.group(1), m.group(1) + if m := _CVT_RE.search(name): return m.group(1), m.group(2) + if m := _MAD_MUL_RE.search(name): return m.group(1), m.group(2) + if m := _PACK_RE.search(name): return m.group(1), m.group(2) + if m := _DST_SRC_RE.search(name): return m.group(1), m.group(2) + if m := _SINGLE_RE.search(name): return m.group(1), m.group(1) return None, None _SPECIAL_REGS = { 'V_LSHLREV_B64': (2, 1, 2, 1), 'V_LSHRREV_B64': (2, 1, 2, 1), 'V_ASHRREV_I64': (2, 1, 2, 1), @@ -71,27 +79,33 @@ _SPECIAL_DTYPE = { 'V_QSAD_PK_U16_U8': ('B64', 'B64', 'B64', 'B64'), 'V_MQSAD_PK_U16_U8': ('B64', 'B64', 'B64', 'B64'), 'V_MQSAD_U32_U8': ('B128', 'B64', 'B64', 'B128'), } +@cache def spec_regs(name: str) -> tuple[int, int, int, int]: - name = name.upper() - if name in _SPECIAL_REGS: return _SPECIAL_REGS[name] - if 'SAD' in name and 'U8' in name and 'QSAD' not in name and 'MQSAD' not in name: return 1, 1, 1, 1 + uname = name.upper() + if uname in _SPECIAL_REGS: return _SPECIAL_REGS[uname] + if 'SAD' in uname and 'U8' in uname and 'QSAD' not in uname and 'MQSAD' not in uname: return 1, 1, 1, 1 dst_suf, src_suf = _suffix(name) return _REGS.get(dst_suf, 1), _REGS.get(src_suf, 1), _REGS.get(src_suf, 1), _REGS.get(src_suf, 1) +@cache def spec_dtype(name: str) -> tuple[str | None, str | None, str | None, str | None]: - name = name.upper() - if name in _SPECIAL_DTYPE: return _SPECIAL_DTYPE[name] - if 'SAD' in name and ('U8' in name or 'U16' in name) and 'QSAD' not in name and 'MQSAD' not in name: return 'U32', 'U32', 'U32', 'U32' - if '_CMP_' in name or '_CMPX_' in name: + uname = name.upper() + if uname in _SPECIAL_DTYPE: return _SPECIAL_DTYPE[uname] + if 'SAD' in uname and ('U8' in uname or 'U16' in uname) and 'QSAD' not in uname and 'MQSAD' not in uname: return 'U32', 'U32', 'U32', 'U32' + if '_CMP_' in uname or '_CMPX_' in uname: dst_suf, src_suf = _suffix(name) - return 'EXEC' if '_CMPX_' in name else 'VCC', src_suf, src_suf, None + return 'EXEC' if '_CMPX_' in uname else 'VCC', src_suf, src_suf, None dst_suf, src_suf = _suffix(name) return dst_suf, src_suf, src_suf, src_suf +_F16_RE = re.compile(r'_[FIUB]16(?:_|$)') +_F64_RE = re.compile(r'_[FIUB]64(?:_|$)') +@cache def spec_is_16bit(name: str) -> bool: - name = name.upper() - if 'SAD' in name or 'PACK' in name or '_PK_' in name or 'SAT_PK' in name or 'DOT2' in name: return False - if '_F32' in name or '_I32' in name or '_U32' in name or '_B32' in name: return False # mixed ops like V_DOT2ACC_F32_F16 - return bool(re.search(r'_[FIUB]16(?:_|$)', name)) -def spec_is_64bit(name: str) -> bool: return bool(re.search(r'_[FIUB]64(?:_|$)', name.upper())) + uname = name.upper() + if 'SAD' in uname or 'PACK' in uname or '_PK_' in uname or 'SAT_PK' in uname or 'DOT2' in uname: return False + if '_F32' in uname or '_I32' in uname or '_U32' in uname or '_B32' in uname: return False + return bool(_F16_RE.search(uname)) +@cache +def spec_is_64bit(name: str) -> bool: return bool(_F64_RE.search(name.upper())) _3SRC = {'FMA', 'MAD', 'MIN3', 'MAX3', 'MED3', 'DIV_FIX', 'DIV_FMAS', 'DIV_SCALE', 'SAD', 'LERP', 'ALIGN', 'CUBE', 'BFE', 'BFI', 'PERM_B32', 'PERMLANE', 'CNDMASK', 'XOR3', 'OR3', 'ADD3', 'LSHL_OR', 'AND_OR', 'LSHL_ADD', 'ADD_LSHL', 'XAD', 'MAXMIN', 'MINMAX', 'DOT2', 'DOT4', 'DOT8', 'WMMA', 'CVT_PK_U8', 'MULLIT', 'CO_CI'} @@ -495,21 +509,25 @@ class Inst: assert cls_name in self._enum_map, f"no enum map for {cls_name}" return self._enum_map[cls_name](val) - @property + @cached_property def op_name(self) -> str: op = self.op return op.name if hasattr(op, 'name') else '' - def dst_regs(self) -> int: return spec_regs(self.op_name)[0] - def src_regs(self, n: int) -> int: return spec_regs(self.op_name)[n + 1] + @cached_property + def _spec_regs(self) -> tuple[int, int, int, int]: return spec_regs(self.op_name) + @cached_property + def _spec_dtype(self) -> tuple[str | None, str | None, str | None, str | None]: return spec_dtype(self.op_name) + def dst_regs(self) -> int: return self._spec_regs[0] + def src_regs(self, n: int) -> int: return self._spec_regs[n + 1] def num_srcs(self) -> int: return spec_num_srcs(self.op_name) - def dst_dtype(self) -> str | None: return spec_dtype(self.op_name)[0] - def src_dtype(self, n: int) -> str | None: return spec_dtype(self.op_name)[n + 1] - def is_src_16(self, n: int) -> bool: return self.src_regs(n) == 1 and is_dtype_16(self.src_dtype(n)) - def is_src_64(self, n: int) -> bool: return self.src_regs(n) == 2 + def dst_dtype(self) -> str | None: return self._spec_dtype[0] + def src_dtype(self, n: int) -> str | None: return self._spec_dtype[n + 1] + def is_src_16(self, n: int) -> bool: return self._spec_regs[n + 1] == 1 and is_dtype_16(self._spec_dtype[n + 1]) + def is_src_64(self, n: int) -> bool: return self._spec_regs[n + 1] == 2 def is_16bit(self) -> bool: return spec_is_16bit(self.op_name) def is_64bit(self) -> bool: return spec_is_64bit(self.op_name) - def is_dst_16(self) -> bool: return self.dst_regs() == 1 and is_dtype_16(self.dst_dtype()) + def is_dst_16(self) -> bool: return self._spec_regs[0] == 1 and is_dtype_16(self._spec_dtype[0]) class Inst32(Inst): pass class Inst64(Inst): pass From b6d08f247d86b4ae0e4d934896a9f409c50e4432 Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 31 Dec 2025 13:37:57 -0500 Subject: [PATCH 05/25] assert z3_xor input type (#13933) --- tinygrad/uop/validate.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tinygrad/uop/validate.py b/tinygrad/uop/validate.py index 42a50d51e3..c73647cbf6 100644 --- a/tinygrad/uop/validate.py +++ b/tinygrad/uop/validate.py @@ -11,9 +11,8 @@ try: # IDIV is truncated division but z3 does euclidian division (floor if b>0 ceil otherwise); mod by power of two sometimes uses Ops.AND def z3_cdiv(a, b):return z3.If((a<0), z3.If(0 Date: Wed, 31 Dec 2025 15:02:49 -0500 Subject: [PATCH 06/25] remove unused validate rule (#13936) --- tinygrad/uop/validate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tinygrad/uop/validate.py b/tinygrad/uop/validate.py index c73647cbf6..fb227996f3 100644 --- a/tinygrad/uop/validate.py +++ b/tinygrad/uop/validate.py @@ -33,7 +33,6 @@ try: (UPat(Ops.CONST, dtypes.ints+(dtypes.index,), name="x"), lambda x,ctx: (z3.IntVal(x.arg, ctx=ctx[0].ctx), None)), (UPat(Ops.CONST, dtypes.bool, name="x"), lambda x,ctx: (z3.BoolVal(x.arg, ctx=ctx[0].ctx), None)), # casts from floats create new variables - (UPat(Ops.CAST, dtypes.bool, src=(UPat(dtype=dtypes.floats),), name="x"), lambda x,ctx: (z3.Bool(f"cast{len(ctx[1])}",ctx=ctx[0].ctx), None)), (UPat(Ops.CAST, dtypes.ints+(dtypes.index,), src=(UPat(dtype=dtypes.floats),), name="x"), lambda x,ctx: create_bounded(f"cast{len(ctx[1])}", x.dtype.min, x.dtype.max, ctx[0])), # A comparison between floats introduces a new bool variable From 52acadc16077f2b113998f29a22dfe1525c83800 Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 31 Dec 2025 15:24:20 -0500 Subject: [PATCH 07/25] consolidate IGNORE_OOB=0 tests (#13937) add a new unit test file and add more cases --- test/test_uop_graph.py | 137 ------------------------- test/unit/test_validate_oob.py | 179 +++++++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+), 137 deletions(-) create mode 100644 test/unit/test_validate_oob.py diff --git a/test/test_uop_graph.py b/test/test_uop_graph.py index 5b8bcd094e..87db0b844b 100644 --- a/test/test_uop_graph.py +++ b/test/test_uop_graph.py @@ -478,143 +478,6 @@ class TestUOpGraph(unittest.TestCase): for u in uops: self.assertNotEqual(u.dtype, dtypes.long) - def test_in_out_of_bounds_access(self): - with Context(IGNORE_OOB=0): - glbl0 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) - ld0 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(UOp.const(dtypes.int, 0), ptr=True),)) - to_uops_list([ld0]) - ld1 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(UOp.const(dtypes.int, 15), ptr=True),)) - to_uops_list([ld1]) - ld1 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(UOp.const(dtypes.int, 7), ptr=True),)) - to_uops_list([ld1]) - - ld0 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(UOp.const(dtypes.int, 42), ptr=True),)) - with self.assertRaises(RuntimeError): to_uops_list([ld0]) - - def test_in_out_of_bounds_access_symbolic(self): - with Context(IGNORE_OOB=0): - glbl0 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) - ld0 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(Variable("i", 1, 10), ptr=True),)) - to_uops_list([ld0]) - ld0 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(Variable("i", 0, 15), ptr=True),)) - to_uops_list([ld0]) - - ld0 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(Variable("i", 0, 20), ptr=True),)) - with self.assertRaises(RuntimeError): to_uops_list([ld0]) - - def test_in_out_of_bounds_access_gated_store(self): - with Context(IGNORE_OOB=0): - glbl0 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), src=(), arg=0) - v = Variable("v", 0, 20) - st0 = UOp(Ops.STORE, dtypes.void, src=(glbl0.index(v.valid(v<16)), UOp.const(dtypes.int, 0))) - to_uops_list([st0]) - - st1 = UOp(Ops.STORE, dtypes.void, (glbl0.index(v.valid(v<20)), v)) - with self.assertRaises(RuntimeError): to_uops_list([st1]) - - @unittest.skip("if not allowed in graph") - def test_in_bounds_access_gated_local(self): - with Context(IGNORE_OOB=0): - # Define buffers - gbuf = UOp(Ops.DEFINE_GLOBAL, dtypes.uint.ptr(400), (), 0) - sbuf = UOp(Ops.DEFINE_LOCAL, dtypes.uint.ptr(8, addrspace=AddrSpace.LOCAL), (), "temp0") - - # Define indices, valids and barrier - gidx = UOp(Ops.SPECIAL, dtypes.int, (UOp.const(dtypes.int, 416),), "gidx0") - lidx = UOp(Ops.SPECIAL, dtypes.int, (UOp.const(dtypes.int, 10),), "lidx0") - - gate = (gidx<400) & (lidx<8) - - local_store = UOp(Ops.STORE, dtypes.void, (sbuf.index(lidx, lidx<8), UOp.const(dtypes.uint, 1))) - - barrier = UOp(Ops.BARRIER, dtypes.void, (local_store,)) - if_barrier = UOp(Ops.IF, dtypes.void, (gate, barrier)) - - # Load from local memory (after the IF/barrier) - local_load = UOp(Ops.LOAD, dtypes.uint, (sbuf.index(lidx, ptr=True), if_barrier)) - - # Store to global memory - global_store = UOp(Ops.STORE, dtypes.void, (gbuf.index(gidx), local_load)) - to_uops_list([global_store]) - - def test_load_with_float_in_index(self): - with Context(IGNORE_OOB=0): - ridx = UOp.range(20, 0) - glbl0 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) - i = (ridx.cast(dtypes.float)*0.68).trunc().cast(dtypes.int) - ld0 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(i.valid((0<=i)&(i<16)), ptr=True),)) - to_uops_list([ld0]) - glblfloat = UOp(Ops.DEFINE_GLOBAL, dtypes.float.ptr(20), (), 0) - ldfloat = UOp(Ops.LOAD, dtypes.float, (glblfloat.index(ridx),)) - i = (ldfloat+3.14).cast(dtypes.int) - ld0 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(i, ((0<=i)&(i<16)), ptr=True),)) - - def test_load_cast_to_bool(self): - with Context(IGNORE_OOB=0): - glbl0 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(1), (), 0) - ridx = UOp.range(20, 0) - ld0 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(ridx.valid(ridx.cast(dtypes.bool).logical_not()), ptr=True),)) - to_uops_list([ld0]) - - @unittest.skip("Bool load is not supported yet") - def test_load_mask(self): - with Context(IGNORE_OOB=0): - glbl0 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) - mask = UOp(Ops.DEFINE_GLOBAL, dtypes.bool.ptr(16), (), 0) - ridx = UOp.range(20, 0) - ld0 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(UOp.const(ridx, ridx<16&mask), ptr=True))) - to_uops_list([ld0]) - - def test_out_of_bounds_off_by_one_access(self): - with Context(IGNORE_OOB=0): - glbl0 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) - ld0 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(UOp.const(dtypes.int, 16), ptr=True),)) - with self.assertRaises(RuntimeError): to_uops_list([ld0]) - - def test_in_out_bounds_access_with_mask(self): - with Context(IGNORE_OOB=0): - glbl0 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) - gidx0 = UOp.range(42, 0, AxisType.GLOBAL) - ld0 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(gidx0.valid((5=0)&(ld0<32)), ptr=True),)) - to_uops_list([ld1]) - - ld1 = UOp(Ops.LOAD, dtypes.int, (glbl1.index((ld0*2).valid((ld0>=0)&(ld0<64)), ptr=True),)) - with self.assertRaises(RuntimeError): to_uops_list([ld1]) - - def test_bounds_with_loaded_bool(self): - with Context(IGNORE_OOB=0): - glbl0 = UOp(Ops.DEFINE_GLOBAL, dtypes.bool.ptr(16), (), 0) - glbl1 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(8), (), 0) - gidx0 = UOp(Ops.SPECIAL, dtypes.index, (UOp.const(dtypes.index, 16),), "gidx0") - ld0 = glbl0.index(gidx0, ptr=True).load() - ld1 = glbl1.index(gidx0.valid(ld0), ptr=True).load() - with self.assertRaises(RuntimeError): to_uops_list([ld1]) - def test_fold_gated_load(self): glbl0 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(), (), 0) glbl1 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(), (), 1) diff --git a/test/unit/test_validate_oob.py b/test/unit/test_validate_oob.py new file mode 100644 index 0000000000..8f1fab6291 --- /dev/null +++ b/test/unit/test_validate_oob.py @@ -0,0 +1,179 @@ +import unittest +from tinygrad import dtypes, Variable +from tinygrad.dtype import AddrSpace +from tinygrad.helpers import Context +from tinygrad.uop.ops import Ops, UOp, AxisType +from test.test_uops import to_uops_list + +class TestValidateOOB(unittest.TestCase): + """Test z3 validation of index bounds for different ALU ops and patterns.""" + + # basic index patterns + def test_const_index(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + to_uops_list([buf.index(UOp.const(dtypes.int, 0), ptr=True).load(dtype=dtypes.int)]) # valid + to_uops_list([buf.index(UOp.const(dtypes.int, 15), ptr=True).load(dtype=dtypes.int)]) # valid (last element) + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(UOp.const(dtypes.int, 16), ptr=True).load(dtype=dtypes.int)]) # off by one + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(UOp.const(dtypes.int, 42), ptr=True).load(dtype=dtypes.int)]) # way out + + def test_variable_index(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + to_uops_list([buf.index(Variable("i", 0, 15), ptr=True).load(dtype=dtypes.int)]) # valid + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(Variable("i", 0, 20), ptr=True).load(dtype=dtypes.int)]) # oob + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(Variable("i", -5, 10), ptr=True).load(dtype=dtypes.int)]) # negative + + def test_range_with_mask(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + r = UOp.range(42, 0, AxisType.GLOBAL) + to_uops_list([buf.index(r.valid(r < 16), ptr=True).load(dtype=dtypes.int)]) # valid + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(r.valid(r < 17), ptr=True).load(dtype=dtypes.int)]) # oob + + def test_variable_with_mask(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + v = Variable("v", -5, 80) + to_uops_list([buf.index(v.valid((v >= 0) & (v < 16)), ptr=True).load(dtype=dtypes.int)]) # valid + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(v.valid(v < 20), ptr=True).load(dtype=dtypes.int)]) # negative not masked + + def test_gated_store(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + v = Variable("v", 0, 20) + to_uops_list([buf.index(v.valid(v < 16)).store(0)]) # valid + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(v.valid(v < 20)).store(0)]) # oob + + # ALU ops in index + def test_idiv(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + to_uops_list([buf.index(UOp.range(32, 0, AxisType.GLOBAL) // 2, ptr=True).load(dtype=dtypes.int)]) # 0..15 valid + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(UOp.range(34, 0, AxisType.GLOBAL) // 2, ptr=True).load(dtype=dtypes.int)]) # 0..16 oob + + def test_mod(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + r = UOp.range(100, 0, AxisType.GLOBAL) + to_uops_list([buf.index(r % 16, ptr=True).load(dtype=dtypes.int)]) # 0..15 valid + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(r % 20, ptr=True).load(dtype=dtypes.int)]) # 0..19 oob + + def test_shr(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + to_uops_list([buf.index(UOp.range(64, 0, AxisType.GLOBAL) >> 2, ptr=True).load(dtype=dtypes.int)]) # 0..15 valid + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(UOp.range(128, 0, AxisType.GLOBAL) >> 2, ptr=True).load(dtype=dtypes.int)]) # 0..31 oob + + def test_shl(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(64), (), 0) + r = UOp.range(8, 0, AxisType.GLOBAL) + to_uops_list([buf.index(r << 2, ptr=True).load(dtype=dtypes.int)]) # 0..28 valid + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(r << 4, ptr=True).load(dtype=dtypes.int)]) # 0..112 oob + + def test_and(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + r = UOp.range(100, 0, AxisType.GLOBAL) + to_uops_list([buf.index(r & 15, ptr=True).load(dtype=dtypes.int)]) # 0..15 valid + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(r & 31, ptr=True).load(dtype=dtypes.int)]) # 0..31 oob + + def test_max(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + to_uops_list([buf.index(Variable("v", -10, 15).maximum(0), ptr=True).load(dtype=dtypes.int)]) # 0..15 valid + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(Variable("v2", -10, 20).maximum(0), ptr=True).load(dtype=dtypes.int)]) # 0..20 oob + + def test_xor_in_mask(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + r = UOp.range(32, 0, AxisType.GLOBAL) + to_uops_list([buf.index(r.valid((r < 8) ^ ((r >= 8) & (r < 16))), ptr=True).load(dtype=dtypes.int)]) # 0..15 valid + with self.assertRaises(RuntimeError): + to_uops_list([buf.index(r.valid((r < 10) ^ (r >= 20)), ptr=True).load(dtype=dtypes.int)]) # 0..9,20..31 oob + + # cast patterns + def test_float_cast_in_index(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + r = UOp.range(20, 0) + i = (r.cast(dtypes.float) * 0.68).trunc().cast(dtypes.int) + to_uops_list([buf.index(i.valid((i >= 0) & (i < 16)), ptr=True).load(dtype=dtypes.int)]) + + def test_bool_cast_in_mask(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(1), (), 0) + r = UOp.range(20, 0) + to_uops_list([buf.index(r.valid(r.cast(dtypes.bool).logical_not()), ptr=True).load(dtype=dtypes.int)]) # only r=0 valid + + # load result as index/mask + def test_load_as_index(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf0 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + buf1 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(64), (), 1) + r = UOp.range(42, 0, AxisType.GLOBAL) + ld0 = buf0.index(r.valid(r < 8), ptr=True).load(dtype=dtypes.int).cast(dtypes.index) + to_uops_list([buf1.index((ld0 * 2).valid((ld0 >= 0) & (ld0 < 32)), ptr=True).load(dtype=dtypes.int)]) # valid + with self.assertRaises(RuntimeError): + to_uops_list([buf1.index((ld0 * 2).valid((ld0 >= 0) & (ld0 < 64)), ptr=True).load(dtype=dtypes.int)]) # oob + + def test_load_bool_as_mask(self): + with Context(IGNORE_OOB=0, SPEC=2): + buf_bool = UOp(Ops.DEFINE_GLOBAL, dtypes.bool.ptr(16), (), 0) + buf_int = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(8), (), 1) + gidx = UOp(Ops.SPECIAL, dtypes.index, (UOp.const(dtypes.index, 16),), "gidx0") + ld_bool = buf_bool.index(gidx, ptr=True).load() + with self.assertRaises(RuntimeError): + to_uops_list([buf_int.index(gidx.valid(ld_bool), ptr=True).load()]) # gidx 0..15, buf_int size 8 + + # skipped tests (moved from test_uop_graph.py) + @unittest.skip("if not allowed in graph") + def test_in_bounds_access_gated_local(self): + with Context(IGNORE_OOB=0): + # Define buffers + gbuf = UOp(Ops.DEFINE_GLOBAL, dtypes.uint.ptr(400), (), 0) + sbuf = UOp(Ops.DEFINE_LOCAL, dtypes.uint.ptr(8, addrspace=AddrSpace.LOCAL), (), "temp0") + + # Define indices, valids and barrier + gidx = UOp(Ops.SPECIAL, dtypes.int, (UOp.const(dtypes.int, 416),), "gidx0") + lidx = UOp(Ops.SPECIAL, dtypes.int, (UOp.const(dtypes.int, 10),), "lidx0") + + gate = (gidx<400) & (lidx<8) + + local_store = UOp(Ops.STORE, dtypes.void, (sbuf.index(lidx, lidx<8), UOp.const(dtypes.uint, 1))) + + barrier = UOp(Ops.BARRIER, dtypes.void, (local_store,)) + if_barrier = UOp(Ops.IF, dtypes.void, (gate, barrier)) + + # Load from local memory (after the IF/barrier) + local_load = UOp(Ops.LOAD, dtypes.uint, (sbuf.index(lidx, ptr=True), if_barrier)) + + # Store to global memory + global_store = UOp(Ops.STORE, dtypes.void, (gbuf.index(gidx), local_load)) + to_uops_list([global_store]) + + @unittest.skip("Bool load is not supported yet") + def test_load_mask(self): + with Context(IGNORE_OOB=0): + glbl0 = UOp(Ops.DEFINE_GLOBAL, dtypes.int.ptr(16), (), 0) + mask = UOp(Ops.DEFINE_GLOBAL, dtypes.bool.ptr(16), (), 0) + ridx = UOp.range(20, 0) + ld0 = UOp(Ops.LOAD, dtypes.int, (glbl0.index(UOp.const(ridx, ridx<16&mask), ptr=True))) + to_uops_list([ld0]) + +if __name__ == "__main__": + unittest.main() From 2bb07d48241bc750b54b91938db409358c290ea6 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Wed, 31 Dec 2025 15:34:51 -0500 Subject: [PATCH 08/25] assembly/amd: move Reg out of the psuedocode (#13934) * assembly/amd: move Reg out of the psuedocode * remove extra * fix pcode tests * simpler pcode * simpler * simpler * cleaner * fix mypy --- extra/assembly/amd/autogen/cdna/gen_pcode.py | 16912 ++-------------- extra/assembly/amd/autogen/rdna3/gen_pcode.py | 14866 ++------------ extra/assembly/amd/autogen/rdna4/gen_pcode.py | 14591 ++----------- extra/assembly/amd/emu.py | 83 +- extra/assembly/amd/pdf.py | 89 +- extra/assembly/amd/test/test_pcode.py | 39 +- 6 files changed, 5965 insertions(+), 40615 deletions(-) diff --git a/extra/assembly/amd/autogen/cdna/gen_pcode.py b/extra/assembly/amd/autogen/cdna/gen_pcode.py index 43e279b68e..d6d79c1a84 100644 --- a/extra/assembly/amd/autogen/cdna/gen_pcode.py +++ b/extra/assembly/amd/autogen/cdna/gen_pcode.py @@ -5,891 +5,309 @@ from extra.assembly.amd.autogen.cdna.enum import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3POp, VOPCOp, VOP3AOp, VOP3BOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b64 = S0.b64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.b32 = S0.b32 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_CMOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.b64 = S0.b64 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_CMOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~S0.u64; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_NOT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~S0.u64 SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # declare i : 6'U; - # for i in 6'0U : 6'31U do - # tmp[i] = S0.u32[i & 6'60U +: 6'4U] != 0U - # endfor; - # D0.u32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_WQM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp[i] = S0.u32[(i & 60) + (4) - 1 : (i & 60)] != 0 D0.u32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0ULL; - # declare i : 6'U; - # for i in 6'0U : 6'63U do - # tmp[i] = S0.u64[i & 6'60U +: 6'4U] != 0ULL - # endfor; - # D0.u64 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_WQM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp[i] = S0.u64[(i & 60) + (4) - 1 : (i & 60)] != 0 D0.u64 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[63 : 0] = S0.u64[0 : 63] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[63 : 0] = S0.u64[0 : 63] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 31 do - # tmp += S0.u32[i] == 1'0U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT0_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp += ((1) if (S0.u32[i] == 0) else (0)) D0.i32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 63 do - # tmp += S0.u64[i] == 1'0U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT0_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp += ((1) if (S0.u64[i] == 0) else (0)) D0.i32 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 31 do - # tmp += S0.u32[i] == 1'1U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT1_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp += ((1) if (S0.u32[i] == 1) else (0)) D0.i32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 63 do - # tmp += S0.u64[i] == 1'1U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT1_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp += ((1) if (S0.u64[i] == 1) else (0)) D0.i32 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_FF0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no zeros are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'0U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FF0_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[i] == 0: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FF0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no zeros are found - # for i in 0 : 63 do - # // Search from LSB - # if S0.u64[i] == 1'0U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FF0_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[i] == 0: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FF1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FF1_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FF1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 63 do - # // Search from LSB - # if S0.u64[i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FF1_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLBIT_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLBIT_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLBIT_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 63 do - # // Search from MSB - # if S0.u64[63 - i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLBIT_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[63 - i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLBIT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.u32[31 - i] != S0.u32[31] then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLBIT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(1, int(31)+1): if S0.u32[31 - i] != S0.u32[31]: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLBIT_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if all bits are the same - # for i in 1 : 63 do - # // Search from MSB - # if S0.u64[63 - i] != S0.u64[63] then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLBIT_I32_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(1, int(63)+1): if S0.u64[63 - i] != S0.u64[63]: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i8)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_SEXT_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i8)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_SEXT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[S0.u32[4 : 0]] = 1'0U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[S0.u32[4 : 0]] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[S0.u32[5 : 0]] = 1'0U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[S0.u32[5 : 0]] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[S0.u32[4 : 0]] = 1'1U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[S0.u32[4 : 0]] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[S0.u32[5 : 0]] = 1'1U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[S0.u32[5 : 0]] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_GETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = PC + 4LL - D0 = Reg(d0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_GETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = PC + 4 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _SOP1Op_S_SETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = S0.i64 - S0 = Reg(s0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_SETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): PC = Reg(S0.i64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOP1Op_S_SWAPPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # jump_addr = S0.i64; - # D0.i64 = PC + 4LL; - # PC = jump_addr.i64 - S0 = Reg(s0) - D0 = Reg(d0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_SWAPPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): jump_addr = S0.i64 D0.i64 = PC + 4 PC = Reg(jump_addr.i64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'PC': PC} -def _SOP1Op_S_RFE_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = S0.i64 - S0 = Reg(s0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_RFE_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): PC = Reg(S0.i64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set - # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 ^ EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_ANDN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 & ~EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_ANDN2_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_ORN2_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 | ~EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_ORN2_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 | ~EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NAND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 ^ EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XNOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # for i in 0 : 7 do - # tmp[i] = S0.u32[i * 4 +: 4] != 0U - # endfor; - # D0.u32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_QUADMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(7)+1): tmp[i] = S0.u32[(i * 4) + (4) - 1 : (i * 4)] != 0 D0.u32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0ULL; - # for i in 0 : 15 do - # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL - # endfor; - # D0.u64 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_QUADMASK_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(15)+1): tmp[i] = S0.u64[(i * 4) + (4) - 1 : (i * 4)] != 0 D0.u64 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; - # SCC = D0.i32 != 0 - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_ABS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((-S0.i32) if (S0.i32 < 0) else (S0.i32)) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_SET_GPR_IDX_IDX(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # M0[7 : 0] = S0.u32[7 : 0].b8 - S0 = Reg(s0) - # --- compiled pseudocode --- +def _SOP1Op_S_SET_GPR_IDX_IDX(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): M0[7 : 0] = S0.u32[7 : 0].b8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _SOP1Op_S_ANDN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u64; - # EXEC.u64 = (~S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_ANDN1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_ORN1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u64; - # EXEC.u64 = (~S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_ORN1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (~S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_ANDN1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u64 = (~S0.u64 & EXEC.u64); - # D0.u64 = EXEC.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_ANDN1_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = EXEC.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_ANDN2_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u64 = (S0.u64 & ~EXEC.u64); - # D0.u64 = EXEC.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_ANDN2_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = EXEC.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32; - # for i in 0 : 31 do - # D0.u64[i * 2] = tmp[i]; - # D0.u64[i * 2 + 1] = tmp[i] - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITREPLICATE_B64_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32) for i in range(0, int(31)+1): D0.u64[i * 2] = tmp[i] D0.u64[i * 2 + 1] = tmp[i] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} SOP1Op_FUNCTIONS = { SOP1Op.S_MOV_B32: _SOP1Op_S_MOV_B32, @@ -943,747 +361,268 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_BITREPLICATE_B64_B32: _SOP1Op_S_BITREPLICATE_B64_B32, } -def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) SCC = Reg(((1) if (S1.u32 > S0.u32) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i32 + S1.i32; - # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i32 + S1.i32) SCC = Reg(((S0.u32[31] == S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i32 - S1.i32; - # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i32 - S1.i32) SCC = Reg(((S0.u32[31] != S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADDC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + SCC.u64) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - SCC.u32; - # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUBB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - SCC.u32) SCC = Reg(((1) if ((S1.u32) + SCC.u64 > (S0.u32)) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < S1.i32; - # D0.i32 = SCC ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < S1.u32; - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= S1.i32; - # D0.i32 = SCC ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= S1.u32; - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_CSELECT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = SCC ? S0.u64 : S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_CSELECT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ((S0.u64) if (SCC) else (S1.u64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 & S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 & S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 | S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 | S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 ^ S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 ^ S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ANDN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & ~S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ANDN2_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & ~S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ANDN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 & ~S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ANDN2_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 & ~S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ORN2_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | ~S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ORN2_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | ~S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ORN2_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 | ~S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ORN2_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 | ~S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 & S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NAND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 & S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 & S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NAND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 & S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 | S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 | S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 | S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 | S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 ^ S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XNOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 ^ S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 << S1[4 : 0].u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 << S1[4 : 0].u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 << S1[5 : 0].u32); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 << S1[5 : 0].u32) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 >> S1[4 : 0].u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 >> S1[4 : 0].u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 >> S1[5 : 0].u32); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 >> S1[5 : 0].u32) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ASHR_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i32) >> S1[4 : 0].u32) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); - # SCC = D0.i64 != 0LL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ASHR_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32) SCC = Reg(D0.i64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_BFM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (((1 << S0[5 : 0].u32) - 1) << S1[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 * S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 * S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); - # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) +def _SOP2Op_S_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_BFE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); - # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); - # SCC = D0.i64 != 0LL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) +def _SOP2Op_S_BFE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32) SCC = Reg(D0.i64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 - S1.i32; - # if D0.i32 < 0 then - # D0.i32 = -D0.i32 - # endif; - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ABSDIFF_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 - S1.i32 if D0.i32 < 0: D0.i32 = -D0.i32 SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u32) * (S1.u32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i32) * (S1.i32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL1_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 1) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL2_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 2) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL3_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 3) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL4_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 4) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_LL_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[15 : 0].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_LH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[31 : 16].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_HH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[31 : 16].u16, S0[31 : 16].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} SOP2Op_FUNCTIONS = { SOP2Op.S_ADD_U32: _SOP2Op_S_ADD_U32, @@ -1739,230 +678,77 @@ SOP2Op_FUNCTIONS = { SOP2Op.S_PACK_HH_B32_B16: _SOP2Op_S_PACK_HH_B32_B16, } -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 == S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 != S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 > S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 <= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 == S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 != S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 > S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 <= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32[S1.u32[4 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32[S1.u32[4 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64[S1.u32[5 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64[S1.u32[5 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_SETVSKIP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VSKIP = S0.u32[S1.u32[4 : 0]] - S0 = Reg(s0) - S1 = Reg(s1) - # --- compiled pseudocode --- - VSKIP = S0.u32[S1.u32[4 : 0]] - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result - -def _SOPCOp_S_SET_GPR_IDX_ON(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # specified in the SRC0 operand. The raw bits of the SRC1 field are read and used to set the enable bits. S1[0] = - # VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and S1[3] = VDST_REL. - # M0[7 : 0] = S0.u32[7 : 0].b8; - # // this is the direct content of raw S1 field - S0 = Reg(s0) - S1 = Reg(s1) - SRC0 = Reg(src0_idx) - VDST = Reg(vdst_idx) - # --- compiled pseudocode --- - specified in the SRC0 operand. The raw bits of the SRC1 field are read and used to set the enable bits. S1[0] = - VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and S1[3] = VDST_REL. - M0[7 : 0] = S0.u32[7 : 0].b8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result - -def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64 == S1.u64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64 != S1.u64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_EQ_I32: _SOPCOp_S_CMP_EQ_I32, @@ -1981,209 +767,83 @@ SOPCOp_FUNCTIONS = { SOPCOp.S_BITCMP1_B32: _SOPCOp_S_BITCMP1_B32, SOPCOp.S_BITCMP0_B64: _SOPCOp_S_BITCMP0_B64, SOPCOp.S_BITCMP1_B64: _SOPCOp_S_BITCMP1_B64, - SOPCOp.S_SETVSKIP: _SOPCOp_S_SETVSKIP, - SOPCOp.S_SET_GPR_IDX_ON: _SOPCOp_S_SET_GPR_IDX_ON, SOPCOp.S_CMP_EQ_U64: _SOPCOp_S_CMP_EQ_U64, SOPCOp.S_CMP_LG_U64: _SOPCOp_S_CMP_LG_U64, } -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOPKOp_S_MOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.i32 = 32'I(signext(S0.i16)) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 == 32'I(signext(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 == (signext(S1.i16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 != 32'I(signext(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_LG_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 != (signext(S1.i16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 > 32'I(signext(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 > (signext(S1.i16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= 32'I(signext(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= (signext(S1.i16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < 32'I(signext(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < (signext(S1.i16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <= 32'I(signext(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 <= (signext(S1.i16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 == 32'U(S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 == (S1.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 != 32'U(S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_LG_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 != (S1.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 > 32'U(S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 > (S1.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= 32'U(S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= (S1.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < 32'U(S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < (S1.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <= 32'U(S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMPK_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 <= (S1.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # D0.i32 = D0.i32 + 32'I(signext(S0.i16)); - # SCC = ((tmp[31] == S0.i16[15]) && (tmp[31] != D0.i32[31])); - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOPKOp_S_ADDK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) D0.i32 = D0.i32 + (signext(S0.i16)) SCC = Reg(((tmp[31] == S0.i16[15]) and (tmp[31] != D0.i32[31]))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = D0.i32 * 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOPKOp_S_MULK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = D0.i32 * (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOPKOp_S_CALL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = PC + 4LL; - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - D0 = Reg(d0) +def _SOPKOp_S_CALL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- D0.i64 = PC + 4 PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'PC': PC} SOPKOp_FUNCTIONS = { SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, @@ -2205,257 +865,118 @@ SOPKOp_FUNCTIONS = { SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64, } -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # for i in 0U : SIMM16.u16[3 : 0].u32 do - # endfor +def _SOPPOp_S_NOP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- for i in range(0, int(SIMM16.u16[3 : 0].u32)+1): pass - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _SOPPOp_S_BRANCH(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL; +def _SOPPOp_S_BRANCH(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_SCC0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - SCC = Reg(scc) +def _SOPPOp_S_CBRANCH_SCC0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if SCC == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'SCC': SCC, 'PC': PC} -def _SOPPOp_S_CBRANCH_SCC1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - SCC = Reg(scc) +def _SOPPOp_S_CBRANCH_SCC1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if SCC == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'SCC': SCC, 'PC': PC} -def _SOPPOp_S_CBRANCH_VCCZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If VCCZ is 1 then jump to a constant offset relative to the current PC. - # if VCCZ.u1 == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - VCC = Reg(vcc) +def _SOPPOp_S_CBRANCH_VCCZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- if VCCZ.u1 == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_VCCNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If VCCZ is 0 then jump to a constant offset relative to the current PC. - # if VCCZ.u1 == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - VCC = Reg(vcc) +def _SOPPOp_S_CBRANCH_VCCNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- if VCCZ.u1 == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_EXECZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if EXECZ.u1 == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - EXEC = Reg(exec_mask) +def _SOPPOp_S_CBRANCH_EXECZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) EXECZ = Reg(1 if EXEC._val == 0 else 0) # --- compiled pseudocode --- if EXECZ.u1 == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_EXECNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if EXECZ.u1 == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - EXEC = Reg(exec_mask) +def _SOPPOp_S_CBRANCH_EXECNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) EXECZ = Reg(1 if EXEC._val == 0 else 0) # --- compiled pseudocode --- if EXECZ.u1 == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // PC passed into trap handler points to S_TRAP itself, - # PC = TBA.i64; - # // trap base address - PC = Reg(pc) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result +def _SOPPOp_S_TRAP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGSYS(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if WAVE_STATUS.COND_DBG_SYS.u32 != 0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGSYS(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if WAVE_STATUS.COND_DBG_SYS.u32 != 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGUSER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if WAVE_STATUS.COND_DBG_USER.u32 != 0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGUSER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if WAVE_STATUS.COND_DBG_USER.u32 != 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_STATUS.COND_DBG_SYS || WAVE_STATUS.COND_DBG_USER) then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if (WAVE_STATUS.COND_DBG_SYS or WAVE_STATUS.COND_DBG_USER): PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_STATUS.COND_DBG_SYS && WAVE_STATUS.COND_DBG_USER) then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if (WAVE_STATUS.COND_DBG_SYS and WAVE_STATUS.COND_DBG_USER): PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_SET_GPR_IDX_MODE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Get Doorbell ID 10 - Returns doorbell into EXEC, with the doorbell physical address in bits - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result +def _SOPPOp_S_SET_GPR_IDX_MODE(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} SOPPOp_FUNCTIONS = { SOPPOp.S_NOP: _SOPPOp_S_NOP, @@ -2474,28 +995,11 @@ SOPPOp_FUNCTIONS = { SOPPOp.S_SET_GPR_IDX_MODE: _SOPPOp_S_SET_GPR_IDX_MODE, } -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'I; - # if EXEC == 0x0LL then - # lane = 0; - # // Force lane 0 if all lanes are disabled - # else - # lane = s_ff1_i32_b64(EXEC); - # // Lowest active lane - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) +def _VOP1Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if EXEC == 0x0: @@ -2503,877 +1007,343 @@ def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter else: lane = s_ff1_i32_b64(EXEC) D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_RPI_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_FLR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = floor(S0.f64 + 0.5) if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + -floor(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + -floor(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32; - # // Can only raise integer DIV_BY_ZERO exception - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / S0.f64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / S0.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sin(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = cos(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FFBH_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FFBL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.i32[31 - i] != S0.i32[31] then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FFBH_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f64) - 1023 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.i32 = 0 else: D0.i32 = exponent(S0.f64) - 1023 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = mantissa(S0.f64) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.f64 = S0.f64 else: D0.f64 = mantissa(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + -floor(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + -floor(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f32) - 127 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.i32 = 0 else: D0.i32 = exponent(S0.f32) - 127 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.f32 = S0.f32 - # else - # D0.f32 = mantissa(S0.f32) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.f32 = S0.f32 else: D0.f32 = mantissa(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b64 = S0.b64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = u16_to_f16(S0.u16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = u16_to_f16(S0.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = i16_to_f16(S0.i16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = i16_to_f16(S0.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_u16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_u16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_i16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_i16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.f16 = S0.f16 - # else - # D0.f16 = mantissa(S0.f16) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.f16 = S0.f16 else: D0.f16 = mantissa(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.i16 = 16'0 - # else - # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.i16 = 0 else: D0.i16 = (exponent(S0.f16) - 15 + 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + -floor(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + -floor(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sin(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_COS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = cos(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_snorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_snorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_unorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_unorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 16'0; - # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); - # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); - # D0.b16 = tmp.b16 - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SAT_PK_U8_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) D0.b16 = tmp.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.b32; - # D0.b32 = S0.b32; - # S0.b32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SWAP_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.b32) D0.b32 = S0.b32 S0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SDWA_SRC0_SEL == BYTE1.b3 then - # D0.f32 = fp8_to_f32(S0[15 : 8].fp8) - # elsif SDWA_SRC0_SEL == BYTE2.b3 then - # D0.f32 = fp8_to_f32(S0[23 : 16].fp8) - # elsif SDWA_SRC0_SEL == BYTE3.b3 then - # D0.f32 = fp8_to_f32(S0[31 : 24].fp8) - # else - # // BYTE0 implied - # D0.f32 = fp8_to_f32(S0[7 : 0].fp8) - # endif - S0 = Reg(s0) - D0 = Reg(d0) +def _VOP1Op_V_CVT_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if SDWA_SRC0_SEL == BYTE1.b3: @@ -3384,23 +1354,9 @@ def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = fp8_to_f32(S0[31 : 24].fp8) else: D0.f32 = fp8_to_f32(S0[7 : 0].fp8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SDWA_SRC0_SEL == BYTE1.b3 then - # D0.f32 = bf8_to_f32(S0[15 : 8].bf8) - # elsif SDWA_SRC0_SEL == BYTE2.b3 then - # D0.f32 = bf8_to_f32(S0[23 : 16].bf8) - # elsif SDWA_SRC0_SEL == BYTE3.b3 then - # D0.f32 = bf8_to_f32(S0[31 : 24].bf8) - # else - # // BYTE0 implied - # D0.f32 = bf8_to_f32(S0[7 : 0].bf8) - # endif - S0 = Reg(s0) - D0 = Reg(d0) +def _VOP1Op_V_CVT_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if SDWA_SRC0_SEL == BYTE1.b3: @@ -3411,93 +1367,44 @@ def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = bf8_to_f32(S0[31 : 24].bf8) else: D0.f32 = bf8_to_f32(S0[7 : 0].bf8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = SDWA_SRC0_SEL[1 : 0] == WORD1.b2 ? S0[31 : 16] : S0[15 : 0]; - # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); - # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) - S0 = Reg(s0) - D0 = Reg(d0) +def _VOP1Op_V_CVT_PK_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) - tmp = Reg(0) SRC0 = Reg(src0_idx) # --- compiled pseudocode --- tmp = Reg(((S0[31 : 16]) if (SDWA_SRC0_SEL[1 : 0] == WORD1.b2) else (S0[15 : 0]))) D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8) D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = SDWA_SRC0_SEL[1 : 0] == WORD1.b2 ? S0[31 : 16] : S0[15 : 0]; - # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); - # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) - S0 = Reg(s0) - D0 = Reg(d0) +def _VOP1Op_V_CVT_PK_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) - tmp = Reg(0) SRC0 = Reg(src0_idx) # --- compiled pseudocode --- tmp = Reg(((S0[31 : 16]) if (SDWA_SRC0_SEL[1 : 0] == WORD1.b2) else (S0[15 : 0]))) D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8) D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_PERMLANE16_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # for pass in 0 : 1 do - # for lane in 0 : 15 do - # tmp = VGPR[pass * 32 + lane][SRC0.u32]; - # endfor - # endfor - tmp = Reg(0) +def _VOP1Op_V_PERMLANE16_SWAP_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- - for pass in range(0, int(1)+1): + for pass_ in range(0, int(1)+1): for lane in range(0, int(15)+1): - tmp = Reg(VGPR[pass * 32 + lane][SRC0.u32]) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + tmp = Reg(VGPR[pass_ * 32 + lane][SRC0.u32]) + return {} -def _VOP1Op_V_PERMLANE32_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # for lane in 0 : 31 do - # tmp = VGPR[lane][SRC0.u32]; - # endfor - tmp = Reg(0) +def _VOP1Op_V_PERMLANE32_SWAP_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- for lane in range(0, int(31)+1): tmp = Reg(VGPR[lane][SRC0.u32]) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP1Op_V_CVT_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 32'F({ S0.b16, 16'0U }) - # V_CMPX_{COMPF}_F16 16-bit float compare. Also writes EXEC. 0x30 to 0x3F - # V_CMPX_{COMPF}_F32 32-bit float compare. Also writes EXEC. 0x50 to 0x5F - # V_CMPSX_{COMPF}_F64 64-bit float compare. Also writes EXEC. 0x70 to 0x7F - # V_CMPX_{COMPI}_I16 16-bit unsigned integer compare. Also writes EXEC. 0xB0 - 0xB7 - # V_CMPX_{COMPI}_U16 16-bit unsigned integer compare. Also writes EXEC. 0xB8 - 0xBF - # V_CMPX_{COMPI}_I32 32-bit unsigned integer compare. Also writes EXEC. 0xD0 - 0xD7 - # V_CMPX_{COMPI}_U32 32-bit unsigned integer compare. Also writes EXEC. 0xD8 - 0xDF - # V_CMPX_{COMPI}_I64 64-bit unsigned integer compare. Also writes EXEC. 0xF0 - 0xF7 - # V_CMPX_{COMPI}_U64 64-bit unsigned integer compare. Also writes EXEC. 0xF8 - 0xFF - S0 = Reg(s0) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = F(_pack(S0.b16, 0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} VOP1Op_FUNCTIONS = { VOP1Op.V_MOV_B32: _VOP1Op_V_MOV_B32, @@ -3582,140 +1489,47 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_CVT_F32_BF16: _VOP1Op_V_CVT_F32_BF16, } -def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S1.f32 - S0.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S1.f32 - S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = fma(S0.f64, S1.f64, D0.f64) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = fma(S0.f64, S1.f64, D0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i24) * (S1.i24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u24) * (S1.u24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((64'F(S0.f32) == +0.0) && (64'F(S1.f32) == -0.0)) then - # D0.f32 = S1.f32 - # elsif ((64'F(S0.f32) == -0.0) && (64'F(S1.f32) == +0.0)) then - # D0.f32 = S0.f32 - # else - # D0.f32 = S0.f32 < S1.f32 ? S0.f32 : S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))): @@ -3730,32 +1544,9 @@ def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = ((S0.f32) if (S0.f32 < S1.f32) else (S1.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((64'F(S0.f32) == +0.0) && (64'F(S1.f32) == -0.0)) then - # D0.f32 = S0.f32 - # elsif ((64'F(S0.f32) == -0.0) && (64'F(S1.f32) == +0.0)) then - # D0.f32 = S1.f32 - # elsif WAVE_MODE.IEEE then - # D0.f32 = S0.f32 >= S1.f32 ? S0.f32 : S1.f32 - # else - # D0.f32 = S0.f32 > S1.f32 ? S0.f32 : S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))): @@ -3772,452 +1563,161 @@ def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = ((S0.f32) if (S0.f32 >= S1.f32) else (S1.f32)) else: D0.f32 = ((S0.f32) if (S0.f32 > S1.f32) else (S1.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = (S1.i32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S1.i32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 << S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 << S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAMK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAAK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32; - # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32) VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_ADDC_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUBB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUBBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S1.f16 - S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S1.f16 - S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.f16 * S1.f16 + D0.f16; - # if OPSEL.u4[3] then - # D0 = { tmp.f16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.f16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.f16 * S1.f16 + D0.f16) if OPSEL.u4[3]: D0 = Reg(_pack(tmp.f16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MADMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.f16 * SIMM16.f16 + S1.f16; - S0 = Reg(s0) - S1 = Reg(s1) - tmp = Reg(0) +def _VOP2Op_V_MADMK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- tmp = Reg(S0.f16 * SIMM16.f16 + S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP2Op_V_MADAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.f16 * S1.f16 + SIMM16.f16; - S0 = Reg(s0) - S1 = Reg(s1) - tmp = Reg(0) +def _VOP2Op_V_MADAK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- tmp = Reg(S0.f16 * S1.f16 + SIMM16.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP2Op_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 + S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 + S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 - S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 - S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S1.u16 - S0.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S1.u16 - S0.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 << S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 << S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = (S1.i16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = (S1.i16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((64'F(S0.f16) == +0.0) && (64'F(S1.f16) == -0.0)) then - # D0.f16 = S0.f16 - # elsif ((64'F(S0.f16) == -0.0) && (64'F(S1.f16) == +0.0)) then - # D0.f16 = S1.f16 - # elsif WAVE_MODE.IEEE then - # D0.f16 = S0.f16 >= S1.f16 ? S0.f16 : S1.f16 - # else - # D0.f16 = S0.f16 > S1.f16 ? S0.f16 : S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))): @@ -4234,30 +1734,9 @@ def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = ((S0.f16) if (S0.f16 >= S1.f16) else (S1.f16)) else: D0.f16 = ((S0.f16) if (S0.f16 > S1.f16) else (S1.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((64'F(S0.f16) == +0.0) && (64'F(S1.f16) == -0.0)) then - # D0.f16 = S1.f16 - # elsif ((64'F(S0.f16) == -0.0) && (64'F(S1.f16) == +0.0)) then - # D0.f16 = S0.f16 - # else - # D0.f16 = S0.f16 < S1.f16 ? S0.f16 : S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))): @@ -4272,172 +1751,64 @@ def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = ((S0.f16) if (S0.f16 < S1.f16) else (S1.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 - S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 - S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S1.u32 - S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S1.u32 - S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.f32; - # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); - # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_DOT2C_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.f32) tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); - # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_DOT2C_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16) tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); - # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); - # tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8); - # tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_DOT4C_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8) tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8) tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8) tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); - # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); - # tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4); - # tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4); - # tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4); - # tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4); - # tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4); - # tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_DOT8C_I32_I4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4) tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4) @@ -4448,62 +1819,27 @@ def _VOP2Op_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4) tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16); - # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_PK_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.f32; - # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); - # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_DOT2C_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.f32) tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16) tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP2Op_FUNCTIONS = { VOP2Op.V_CNDMASK_B32: _VOP2Op_V_CNDMASK_B32, @@ -4570,448 +1906,198 @@ VOP2Op_FUNCTIONS = { VOP2Op.V_DOT2C_F32_BF16: _VOP2Op_V_DOT2C_F32_BF16, } -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16 tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16 tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_SUB_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16 tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32) tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32) tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32) tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 >= S1[15 : 0].i16) else (S1[15 : 0].i16)) tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 >= S1[31 : 16].i16) else (S1[31 : 16].i16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 < S1[15 : 0].i16) else (S1[15 : 0].i16)) tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 < S1[31 : 16].i16) else (S1[31 : 16].i16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16 tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16 tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16 tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 >= S1[15 : 0].u16) else (S1[15 : 0].u16)) tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 >= S1[31 : 16].u16) else (S1[31 : 16].u16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 < S1[15 : 0].u16) else (S1[15 : 0].u16)) tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 < S1[31 : 16].u16) else (S1[31 : 16].u16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); - # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16) tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; - # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16 tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; - # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16 tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16) tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16) tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); - # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.i32; - # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); - # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.i32) tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16) tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u16_to_u32(S0[15 : 0].u16) * u16_to_u32(S1[15 : 0].u16); - # tmp += u16_to_u32(S0[31 : 16].u16) * u16_to_u32(S1[31 : 16].u16); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u16_to_u32(S0[15 : 0].u16) * u16_to_u32(S1[15 : 0].u16) tmp += u16_to_u32(S0[31 : 16].u16) * u16_to_u32(S1[31 : 16].u16) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.i32; - # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); - # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); - # tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8); - # tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.i32) tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8) tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8) tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8) tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); - # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); - # tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8); - # tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8) tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8) tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8) tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT8_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.i32; - # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); - # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); - # tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4); - # tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4); - # tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4); - # tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4); - # tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4); - # tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT8_I32_I4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.i32) tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4) tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4) @@ -5022,27 +2108,9 @@ def _VOP3POp_V_DOT8_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4) tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); - # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); - # tmp += u4_to_u32(S0[11 : 8].u4) * u4_to_u32(S1[11 : 8].u4); - # tmp += u4_to_u32(S0[15 : 12].u4) * u4_to_u32(S1[15 : 12].u4); - # tmp += u4_to_u32(S0[19 : 16].u4) * u4_to_u32(S1[19 : 16].u4); - # tmp += u4_to_u32(S0[23 : 20].u4) * u4_to_u32(S1[23 : 20].u4); - # tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4); - # tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT8_U32_U4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4) tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4) @@ -5053,135 +2121,63 @@ def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4) tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 64'B; - # tmp[31 : 0].f32 = fma(S0[31 : 0].f32, S1[31 : 0].f32, S2[31 : 0].f32); - # tmp[63 : 32].f32 = fma(S0[63 : 32].f32, S1[63 : 32].f32, S2[63 : 32].f32); - # D0.b64 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_FMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 0].f32 = fma(S0[31 : 0].f32, S1[31 : 0].f32, S2[31 : 0].f32) tmp[63 : 32].f32 = fma(S0[63 : 32].f32, S1[63 : 32].f32, S2[63 : 32].f32) D0.b64 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 64'B; - # tmp[31 : 0].f32 = S0[31 : 0].f32 * S1[31 : 0].f32; - # tmp[63 : 32].f32 = S0[63 : 32].f32 * S1[63 : 32].f32; - # D0.b64 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 0].f32 = S0[31 : 0].f32 * S1[31 : 0].f32 tmp[63 : 32].f32 = S0[63 : 32].f32 * S1[63 : 32].f32 D0.b64 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 64'B; - # tmp[31 : 0].f32 = S0[31 : 0].f32 + S1[31 : 0].f32; - # tmp[63 : 32].f32 = S0[63 : 32].f32 + S1[63 : 32].f32; - # D0.b64 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 0].f32 = S0[31 : 0].f32 + S1[31 : 0].f32 tmp[63 : 32].f32 = S0[63 : 32].f32 + S1[63 : 32].f32 D0.b64 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3POp_V_PK_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp0.u32 = S0.u32[OPSEL[0].i32 * 32 + 31 : OPSEL[0].i32 * 32]; - # tmp1.u32 = S1.u32[OPSEL[1].i32 * 32 + 31 : OPSEL[1].i32 * 32]; - # D0.u32[31 : 0] = tmp0.u32; - # D0.u32[63 : 32] = tmp1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp0.u32 = S0.u32[OPSEL[0].i32 * 32 + 31 : OPSEL[0].i32 * 32] tmp1.u32 = S1.u32[OPSEL[1].i32 * 32 + 31 : OPSEL[1].i32 * 32] D0.u32[31 : 0] = tmp0.u32 D0.u32[63 : 32] = tmp1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 32'F(S0[15 : 0].bf16) * 32'F(S1[15 : 0].bf16); - # tmp += 32'F(S0[31 : 16].bf16) * 32'F(S1[31 : 16].bf16); - # tmp += S2.f32; - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(F(S0[15 : 0].bf16) * F(S1[15 : 0].bf16)) tmp += F(S0[31 : 16].bf16) * F(S1[31 : 16].bf16) tmp += S2.f32 D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].f16 = 16'F(v_minimum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)); - # tmp[15 : 0].f16 = 16'F(v_minimum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MINIMUM3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = F(v_minimum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)) tmp[15 : 0].f16 = F(v_minimum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].f16 = 16'F(v_maximum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)); - # tmp[15 : 0].f16 = 16'F(v_maximum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAXIMUM3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = F(v_maximum3_f16(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16)) tmp[15 : 0].f16 = F(v_maximum3_f16(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16)) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP3POp_FUNCTIONS = { VOP3POp.V_PK_MAD_I16: _VOP3POp_V_PK_MAD_I16, @@ -5219,45 +2215,7 @@ VOP3POp_FUNCTIONS = { VOP3POp.V_PK_MAXIMUM3_F16: _VOP3POp_V_PK_MAXIMUM3_F16, } -def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -5271,52 +2229,9 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = D0.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -5330,52 +2245,9 @@ def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -5389,52 +2261,9 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = D0.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -5448,52 +2277,9 @@ def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -5507,52 +2293,9 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = D0.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -5566,4131 +2309,773 @@ def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_TRU_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_TRU_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_TRU_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_TRU_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_TRU_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_TRU_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); - # tmp = MEM[addr].u32; - # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); - # tmp = MEM[addr].u32; - # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); - # tmp = MEM[addr].u32; - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - tmp = Reg(0) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) tmp = Reg(MEM[addr].u32) @@ -9698,15 +3083,7 @@ def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP tmp = Reg(MEM[addr].u32) addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) tmp = Reg(MEM[addr].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['exec_lane'] = (EXEC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'EXEC': EXEC} VOPCOp_FUNCTIONS = { VOPCOp.V_CMP_CLASS_F32: _VOPCOp_V_CMP_CLASS_F32, @@ -9909,45 +3286,7 @@ VOPCOp_FUNCTIONS = { VOPCOp.V_CMPX_T_U64: _VOPCOp_V_CMPX_T_U64, } -def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -9961,51 +3300,9 @@ def _VOP3AOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = D0.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -10019,51 +3316,9 @@ def _VOP3AOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -10077,51 +3332,9 @@ def _VOP3AOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = D0.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -10135,51 +3348,9 @@ def _VOP3AOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -10193,51 +3364,9 @@ def _VOP3AOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into the EXEC mask and - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = D0.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -10251,3939 +3380,773 @@ def _VOP3AOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_TRU_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_TRU_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_TRU_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_TRU_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_TRU_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_TRU_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_TRU_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_TRU_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_TRU_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_T_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_T_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_T_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_T_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_T_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_T_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMP_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into the EXEC mask and to VCC or a scalar register. - # EXEC.u64[laneId] = D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); - # tmp = MEM[addr].u32; - # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); - # tmp = MEM[addr].u32; - # addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32); - # tmp = MEM[addr].u32; - D0 = Reg(d0) - VCC = Reg(vcc) - EXEC = Reg(exec_mask) - tmp = Reg(0) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3AOp_V_CMPX_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = D0.u64[laneId] = 1 addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) tmp = Reg(MEM[addr].u32) @@ -14191,37 +4154,13 @@ def _VOP3AOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG tmp = Reg(MEM[addr].u32) addr = CalcDsAddr(ADDR.b32, OFFSET0.b32, OFFSET1.b32) tmp = Reg(MEM[addr].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3AOp_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'I; - # if EXEC == 0x0LL then - # lane = 0; - # // Force lane 0 if all lanes are disabled - # else - # lane = s_ff1_i32_b64(EXEC); - # // Lowest active lane - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) +def _VOP3AOp_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if EXEC == 0x0: @@ -14229,827 +4168,314 @@ def _VOP3AOp_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, lite else: lane = s_ff1_i32_b64(EXEC) D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_RPI_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_RPI_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_FLR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_FLR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = floor(S0.f64 + 0.5) if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + -floor(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + -floor(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32; - # // Can only raise integer DIV_BY_ZERO exception - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / S0.f64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / S0.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sin(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = cos(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FFBH_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FFBH_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FFBL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FFBL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FFBH_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.i32[31 - i] != S0.i32[31] then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FFBH_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f64) - 1023 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.i32 = 0 else: D0.i32 = exponent(S0.f64) - 1023 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = mantissa(S0.f64) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.f64 = S0.f64 else: D0.f64 = mantissa(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + -floor(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + -floor(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f32) - 127 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.i32 = 0 else: D0.i32 = exponent(S0.f32) - 127 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.f32 = S0.f32 - # else - # D0.f32 = mantissa(S0.f32) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.f32 = S0.f32 else: D0.f32 = mantissa(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b64 = S0.b64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = u16_to_f16(S0.u16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = u16_to_f16(S0.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = i16_to_f16(S0.i16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = i16_to_f16(S0.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_u16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_u16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_i16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_i16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3AOp_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S1.f32 - S0.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S1.f32 - S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FMAC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = fma(S0.f64, S1.f64, D0.f64) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FMAC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = fma(S0.f64, S1.f64, D0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i24) * (S1.i24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u24) * (S1.u24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((64'F(S0.f32) == +0.0) && (64'F(S1.f32) == -0.0)) then - # D0.f32 = S1.f32 - # elsif ((64'F(S0.f32) == -0.0) && (64'F(S1.f32) == +0.0)) then - # D0.f32 = S0.f32 - # else - # D0.f32 = S0.f32 < S1.f32 ? S0.f32 : S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))): @@ -15064,32 +4490,9 @@ def _VOP3AOp_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = ((S0.f32) if (S0.f32 < S1.f32) else (S1.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((64'F(S0.f32) == +0.0) && (64'F(S1.f32) == -0.0)) then - # D0.f32 = S0.f32 - # elsif ((64'F(S0.f32) == -0.0) && (64'F(S1.f32) == +0.0)) then - # D0.f32 = S1.f32 - # elsif WAVE_MODE.IEEE then - # D0.f32 = S0.f32 >= S1.f32 ? S0.f32 : S1.f32 - # else - # D0.f32 = S0.f32 > S1.f32 ? S0.f32 : S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f32))): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f32))): @@ -15106,284 +4509,101 @@ def _VOP3AOp_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = ((S0.f32) if (S0.f32 >= S1.f32) else (S1.f32)) else: D0.f32 = ((S0.f32) if (S0.f32 > S1.f32) else (S1.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = (S1.i32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S1.i32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 << S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 << S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S1.f16 - S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S1.f16 - S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.f16 * S1.f16 + D0.f16; - # if OPSEL.u4[3] then - # D0 = { tmp.f16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.f16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.f16 * S1.f16 + D0.f16) if OPSEL.u4[3]: D0 = Reg(_pack(tmp.f16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 + S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 + S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 - S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 - S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUBREV_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S1.u16 - S0.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUBREV_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S1.u16 - S0.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 << S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 << S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = (S1.i16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = (S1.i16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((64'F(S0.f16) == +0.0) && (64'F(S1.f16) == -0.0)) then - # D0.f16 = S0.f16 - # elsif ((64'F(S0.f16) == -0.0) && (64'F(S1.f16) == +0.0)) then - # D0.f16 = S1.f16 - # elsif WAVE_MODE.IEEE then - # D0.f16 = S0.f16 >= S1.f16 ? S0.f16 : S1.f16 - # else - # D0.f16 = S0.f16 > S1.f16 ? S0.f16 : S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))): @@ -15400,30 +4620,9 @@ def _VOP3AOp_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = ((S0.f16) if (S0.f16 >= S1.f16) else (S1.f16)) else: D0.f16 = ((S0.f16) if (S0.f16 > S1.f16) else (S1.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(64'F(S0.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif (WAVE_MODE.IEEE && isSignalNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((64'F(S0.f16) == +0.0) && (64'F(S1.f16) == -0.0)) then - # D0.f16 = S1.f16 - # elsif ((64'F(S0.f16) == -0.0) && (64'F(S1.f16) == +0.0)) then - # D0.f16 = S0.f16 - # else - # D0.f16 = S0.f16 < S1.f16 ? S0.f16 : S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(F(S0.f16))): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) elif (WAVE_MODE.IEEE and isSignalNAN(F(S1.f16))): @@ -15438,172 +4637,64 @@ def _VOP3AOp_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = ((S0.f16) if (S0.f16 < S1.f16) else (S1.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 - S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 - S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUBREV_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S1.u32 - S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUBREV_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S1.u32 - S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DOT2C_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.f32; - # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); - # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DOT2C_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.f32) tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DOT2C_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16); - # tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DOT2C_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) tmp += i16_to_i32(S0[15 : 0].i16) * i16_to_i32(S1[15 : 0].i16) tmp += i16_to_i32(S0[31 : 16].i16) * i16_to_i32(S1[31 : 16].i16) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DOT4C_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8); - # tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8); - # tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8); - # tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DOT4C_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) tmp += i8_to_i32(S0[7 : 0].i8) * i8_to_i32(S1[7 : 0].i8) tmp += i8_to_i32(S0[15 : 8].i8) * i8_to_i32(S1[15 : 8].i8) tmp += i8_to_i32(S0[23 : 16].i8) * i8_to_i32(S1[23 : 16].i8) tmp += i8_to_i32(S0[31 : 24].i8) * i8_to_i32(S1[31 : 24].i8) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4); - # tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4); - # tmp += i4_to_i32(S0[11 : 8].i4) * i4_to_i32(S1[11 : 8].i4); - # tmp += i4_to_i32(S0[15 : 12].i4) * i4_to_i32(S1[15 : 12].i4); - # tmp += i4_to_i32(S0[19 : 16].i4) * i4_to_i32(S1[19 : 16].i4); - # tmp += i4_to_i32(S0[23 : 20].i4) * i4_to_i32(S1[23 : 20].i4); - # tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4); - # tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4); - # D0.i32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DOT8C_I32_I4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) tmp += i4_to_i32(S0[3 : 0].i4) * i4_to_i32(S1[3 : 0].i4) tmp += i4_to_i32(S0[7 : 4].i4) * i4_to_i32(S1[7 : 4].i4) @@ -15614,99 +4705,30 @@ def _VOP3AOp_V_DOT8C_I32_I4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, tmp += i4_to_i32(S0[27 : 24].i4) * i4_to_i32(S1[27 : 24].i4) tmp += i4_to_i32(S0[31 : 28].i4) * i4_to_i32(S1[31 : 28].i4) D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16); - # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_PK_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) + S2.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # if S2.f32 < 0.0F then - # D0.f32 = 5.0F - # else - # D0.f32 = 4.0F - # endif - # elsif abs(S1.f32) >= abs(S0.f32) then - # if S1.f32 < 0.0F then - # D0.f32 = 3.0F - # else - # D0.f32 = 2.0F - # endif - # else - # if S0.f32 < 0.0F then - # D0.f32 = 1.0F - # else - # D0.f32 = 0.0F - # endif - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CUBEID_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): if S2.f32 < 0.0: D0.f32 = 5.0 @@ -15722,36 +4744,9 @@ def _VOP3AOp_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = 1.0 else: D0.f32 = 0.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = cubemap S coordinate. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # if S2.f32 < 0.0F then - # D0.f32 = -S0.f32 - # else - # D0.f32 = S0.f32 - # endif - # elsif abs(S1.f32) >= abs(S0.f32) then - # D0.f32 = S0.f32 - # else - # if S0.f32 < 0.0F then - # D0.f32 = S2.f32 - # else - # D0.f32 = -S2.f32 - # endif - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CUBESC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): if S2.f32 < 0.0: D0.f32 = -S0.f32 @@ -15764,32 +4759,9 @@ def _VOP3AOp_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S2.f32 else: D0.f32 = -S2.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = cubemap T coordinate. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # D0.f32 = -S1.f32 - # elsif abs(S1.f32) >= abs(S0.f32) then - # if S1.f32 < 0.0F then - # D0.f32 = -S2.f32 - # else - # D0.f32 = S2.f32 - # endif - # else - # D0.f32 = -S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CUBETC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): D0.f32 = -S1.f32 elif abs(S1.f32) >= abs(S0.f32): @@ -15799,234 +4771,81 @@ def _VOP3AOp_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S2.f32 else: D0.f32 = -S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = 2.0 * cubemap major axis. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # D0.f32 = S2.f32 * 2.0F - # elsif abs(S1.f32) >= abs(S0.f32) then - # D0.f32 = S1.f32 * 2.0F - # else - # D0.f32 = S0.f32 * 2.0F - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CUBEMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): D0.f32 = S2.f32 * 2.0 elif abs(S1.f32) >= abs(S0.f32): D0.f32 = S1.f32 * 2.0 else: D0.f32 = S0.f32 * 2.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); - # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3AOp_V_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_BFI_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = fma(S0.f64, S1.f64, S2.f64) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FMA_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); - # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); - # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); - # tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1U); - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LERP_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1 << 24)) tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1 << 16) tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1 << 8) tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0]) & 0xffffffffLL) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ALIGNBIT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((_pack32(S0.u32, S1.u32) >> S2.u32[4 : 0]) & 0xffffffff) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0] * 8U)) & 0xffffffffLL) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ALIGNBYTE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((_pack32(S0.u32, S1.u32) >> (S2.u32[1 : 0] * 8)) & 0xffffffff) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then - # D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) - # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then - # D0.f32 = v_max_f32(S1.f32, S2.f32) - # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S1.f32 then - # D0.f32 = v_max_f32(S0.f32, S2.f32) - # else - # D0.f32 = v_max_f32(S0.f32, S1.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MED3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isNAN(F(S0.f32)) or isNAN(F(S1.f32)) or isNAN(F(S2.f32))): D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) elif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32: @@ -16035,172 +4854,57 @@ def _VOP3AOp_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0.f32 = v_max_f32(S0.f32, S2.f32) else: D0.f32 = v_max_f32(S0.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then - # D0.i32 = v_max_i32(S1.i32, S2.i32) - # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then - # D0.i32 = v_max_i32(S0.i32, S2.i32) - # else - # D0.i32 = v_max_i32(S0.i32, S1.i32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MED3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32: D0.i32 = v_max_i32(S1.i32, S2.i32) elif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32: D0.i32 = v_max_i32(S0.i32, S2.i32) else: D0.i32 = v_max_i32(S0.i32, S1.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then - # D0.u32 = v_max_u32(S1.u32, S2.u32) - # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then - # D0.u32 = v_max_u32(S0.u32, S2.u32) - # else - # D0.u32 = v_max_u32(S0.u32, S1.u32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MED3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32: D0.u32 = v_max_u32(S1.u32, S2.u32) elif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32: D0.u32 = v_max_u32(S0.u32, S2.u32) else: D0.u32 = v_max_u32(S0.u32, S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); - # tmp += 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); - # tmp += 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); - # tmp += 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += (ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])) tmp += (ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])) tmp += (ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])) tmp += (ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SAD_HI_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((v_sad_u8(S0, S1, 0)) << 16) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); - # tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16) tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); - # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_PK_U8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S2.u32 & (~(0xff << (S1.u32[1 : 0].u32 * 8))))) tmp = Reg((tmp | (((f32_to_u8(S0.f32)) & 255) << (S1.u32[1 : 0].u32 * 8)))) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f32) ^ sign(S2.f32)); - # if isNAN(64'F(S2.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif ((64'F(S1.f32) == 0.0) && (64'F(S2.f32) == 0.0)) then - # // 0/0 - # D0.f32 = 32'F(0xffc00000) - # elsif ((64'F(abs(S1.f32)) == +INF) && (64'F(abs(S2.f32)) == +INF)) then - # // inf/inf - # D0.f32 = 32'F(0xffc00000) - # elsif ((64'F(S1.f32) == 0.0) || (64'F(abs(S2.f32)) == +INF)) then - # // x/0, or inf/y - # D0.f32 = sign_out ? -INF.f32 : +INF.f32 - # elsif ((64'F(abs(S1.f32)) == +INF) || (64'F(S2.f32) == 0.0)) then - # // x/inf, 0/y - # D0.f32 = sign_out ? -0.0F : 0.0F - # elsif exponent(S2.f32) - exponent(S1.f32) < -150 then - # D0.f32 = sign_out ? -UNDERFLOW_F32 : UNDERFLOW_F32 - # elsif exponent(S1.f32) == 255 then - # D0.f32 = sign_out ? -OVERFLOW_F32 : OVERFLOW_F32 - # else - # D0.f32 = sign_out ? -abs(S0.f32) : abs(S0.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DIV_FIXUP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f32) ^ sign(S2.f32)) if isNAN(F(S2.f32)): D0.f32 = F(cvtToQuietNAN(F(S2.f32))) @@ -16220,40 +4924,9 @@ def _VOP3AOp_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) else: D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) if isNAN(S0.f32) else ((-abs(S0.f32)) if (sign_out) else (abs(S0.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f64) ^ sign(S2.f64)); - # if isNAN(S2.f64) then - # D0.f64 = cvtToQuietNAN(S2.f64) - # elsif isNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif ((S1.f64 == 0.0) && (S2.f64 == 0.0)) then - # // 0/0 - # D0.f64 = 64'F(0xfff8000000000000LL) - # elsif ((abs(S1.f64) == +INF) && (abs(S2.f64) == +INF)) then - # // inf/inf - # D0.f64 = 64'F(0xfff8000000000000LL) - # elsif ((S1.f64 == 0.0) || (abs(S2.f64) == +INF)) then - # // x/0, or inf/y - # D0.f64 = sign_out ? -INF : +INF - # elsif ((abs(S1.f64) == +INF) || (S2.f64 == 0.0)) then - # // x/inf, 0/y - # D0.f64 = sign_out ? -0.0 : 0.0 - # elsif exponent(S2.f64) - exponent(S1.f64) < -1075 then - # D0.f64 = sign_out ? -UNDERFLOW_F64 : UNDERFLOW_F64 - # elsif exponent(S1.f64) == 2047 then - # D0.f64 = sign_out ? -OVERFLOW_F64 : OVERFLOW_F64 - # else - # D0.f64 = sign_out ? -abs(S0.f64) : abs(S0.f64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DIV_FIXUP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f64) ^ sign(S2.f64)) if isNAN(S2.f64): D0.f64 = cvtToQuietNAN(S2.f64) @@ -16273,90 +4946,32 @@ def _VOP3AOp_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) else: D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if VCC.u64[laneId] then - # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) - # else - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3AOp_V_DIV_FMAS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if VCC.u64[laneId]: D0.f32 = (2.0 ** 64 if exponent(S2.f32) > 127 else 2.0 ** -64) * fma(S0.f32, S1.f32, S2.f32) else: D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3AOp_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if VCC.u64[laneId] then - # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) - # else - # D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3AOp_V_DIV_FMAS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if VCC.u64[laneId]: D0.f64 = (2.0 ** 128 if exponent(S2.f64) > 1023 else 2.0 ** -128) * fma(S0.f64, S1.f64, S2.f64) else: D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); - # tmp += S1.u32[15 : 8] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); - # tmp += S1.u32[23 : 16] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); - # tmp += S1.u32[31 : 24] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MSAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += ((0) if (S1.u32[7 : 0] == 0) else ((ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])))) tmp += ((0) if (S1.u32[15 : 8] == 0) else ((ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])))) tmp += ((0) if (S1.u32[23 : 16] == 0) else ((ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])))) tmp += ((0) if (S1.u32[31 : 24] == 0) else ((ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])))) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); - # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); - # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); - # tmp[15 : 0] = 16'B(v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)); - # D0.b64 = tmp.b64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3AOp_V_QSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[63 : 48] = (v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)) @@ -16364,21 +4979,9 @@ def _VOP3AOp_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal tmp[31 : 16] = (v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)) tmp[15 : 0] = (v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); - # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); - # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); - # tmp[15 : 0] = 16'B(v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)); - # D0.b64 = tmp.b64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3AOp_V_MQSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[63 : 48] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)) @@ -16386,21 +4989,9 @@ def _VOP3AOp_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, litera tmp[31 : 16] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)) tmp[15 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); - # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); - # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); - # tmp[31 : 0] = 32'B(v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)); - # D0.b128 = tmp.b128 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3AOp_V_MQSAD_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[127 : 96] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)) @@ -16408,148 +4999,48 @@ def _VOP3AOp_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, tmp[63 : 32] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)) tmp[31 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)) D0.b128 = tmp.b128 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.f16 * S1.f16 + S2.f16; - # if OPSEL.u4[3] then - # D0 = { tmp.f16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.f16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_LEGACY_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.f16 * S1.f16 + S2.f16) if OPSEL.u4[3]: D0 = Reg(_pack(tmp.f16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_LEGACY_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u16 * S1.u16 + S2.u16; - # if OPSEL.u4[3] then - # D0 = { tmp.u16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.u16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_LEGACY_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u16 * S1.u16 + S2.u16) if OPSEL.u4[3]: D0 = Reg(_pack(tmp.u16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_LEGACY_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i16 * S1.i16 + S2.i16; - # if OPSEL.u4[3] then - # D0 = { tmp.i16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.i16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_LEGACY_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i16 * S1.i16 + S2.i16) if OPSEL.u4[3]: D0 = Reg(_pack(tmp.i16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_PERM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 24] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[31 : 24]); - # D0[23 : 16] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[23 : 16]); - # D0[15 : 8] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[15 : 8]); - # D0[7 : 0] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[7 : 0]) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_PERM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24]) D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16]) D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8]) D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0]) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FMA_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = fma(S0.f16, S1.f16, S2.f16); - # if OPSEL.u4[3] then - # D0 = { tmp.f16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.f16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FMA_LEGACY_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(fma(S0.f16, S1.f16, S2.f16)) if OPSEL.u4[3]: D0 = Reg(_pack(tmp.f16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f16) ^ sign(S2.f16)); - # if isNAN(64'F(S2.f16)) then - # tmp = cvtToQuietNAN(64'F(S2.f16)) - # elsif isNAN(64'F(S1.f16)) then - # tmp = cvtToQuietNAN(64'F(S1.f16)) - # elsif ((64'F(S1.f16) == 0.0) && (64'F(S2.f16) == 0.0)) then - # // 0/0 - # tmp = 16'F(0xfe00) - # elsif ((64'F(abs(S1.f16)) == +INF) && (64'F(abs(S2.f16)) == +INF)) then - # // inf/inf - # tmp = 16'F(0xfe00) - # elsif ((64'F(S1.f16) == 0.0) || (64'F(abs(S2.f16)) == +INF)) then - # // x/0, or inf/y - # tmp = sign_out ? -INF : +INF - # elsif ((64'F(abs(S1.f16)) == +INF) || (64'F(S2.f16) == 0.0)) then - # // x/inf, 0/y - # tmp = sign_out ? -0.0 : 0.0 - # else - # tmp = sign_out ? -abs(S0.f16) : abs(S0.f16) - # endif; - # if OPSEL.u4[3] then - # D0 = { tmp.f16, D0[15 : 0] } - # else - # D0 = { 16'0, tmp.f16 } - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f16) ^ sign(S2.f16)) if isNAN(F(S2.f16)): tmp = Reg(cvtToQuietNAN(F(S2.f16))) @@ -16569,148 +5060,51 @@ def _VOP3AOp_V_DIV_FIXUP_LEGACY_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, l D0 = Reg(_pack(tmp.f16, D0[15 : 0])) else: D0 = Reg(_pack(0, tmp.f16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_PKACCUM_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # byte = S1.u32[1 : 0]; - # bit = byte.u32 * 8U; - # D0.u32[bit + 7U : bit] = 32'U(f32_to_u8(S0.f32)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_PKACCUM_U8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): byte = S1.u32[1 : 0] bit = byte.u32 * 8 - D0.u32[bit + 7U : bit] = (f32_to_u8(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + D0.u32[bit + 7 : bit] = (f32_to_u8(S0.f32)) + return {'D0': D0} -def _VOP3AOp_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u16) * (S1.u16) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i16) * (S1.i16) + S2.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_XAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then - # D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) - # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then - # D0.f16 = v_max_f16(S1.f16, S2.f16) - # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S1.f16 then - # D0.f16 = v_max_f16(S0.f16, S2.f16) - # else - # D0.f16 = v_max_f16(S0.f16, S1.f16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MED3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isNAN(F(S0.f16)) or isNAN(F(S1.f16)) or isNAN(F(S2.f16))): D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) elif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16: @@ -16719,202 +5113,67 @@ def _VOP3AOp_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR D0.f16 = v_max_f16(S0.f16, S2.f16) else: D0.f16 = v_max_f16(S0.f16, S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then - # D0.i16 = v_max_i16(S1.i16, S2.i16) - # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then - # D0.i16 = v_max_i16(S0.i16, S2.i16) - # else - # D0.i16 = v_max_i16(S0.i16, S1.i16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MED3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16: D0.i16 = v_max_i16(S1.i16, S2.i16) elif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16: D0.i16 = v_max_i16(S0.i16, S2.i16) else: D0.i16 = v_max_i16(S0.i16, S1.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then - # D0.u16 = v_max_u16(S1.u16, S2.u16) - # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then - # D0.u16 = v_max_u16(S0.u16, S2.u16) - # else - # D0.u16 = v_max_u16(S0.u16, S1.u16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MED3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16: D0.u16 = v_max_u16(S1.u16, S2.u16) elif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16: D0.u16 = v_max_u16(S0.u16, S2.u16) else: D0.u16 = v_max_u16(S0.u16, S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHL_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_LSHL_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHL_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_AND_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 & S1.u32) | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32 | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_OR3_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32 | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 + S2.f16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 + S2.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 + S2.u16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 + S2.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 * S1.i16 + S2.i16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 * S1.i16 + S2.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f16) ^ sign(S2.f16)); - # if isNAN(64'F(S2.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif ((64'F(S1.f16) == 0.0) && (64'F(S2.f16) == 0.0)) then - # // 0/0 - # D0.f16 = 16'F(0xfe00) - # elsif ((64'F(abs(S1.f16)) == +INF) && (64'F(abs(S2.f16)) == +INF)) then - # // inf/inf - # D0.f16 = 16'F(0xfe00) - # elsif ((64'F(S1.f16) == 0.0) || (64'F(abs(S2.f16)) == +INF)) then - # // x/0, or inf/y - # D0.f16 = sign_out ? -INF.f16 : +INF.f16 - # elsif ((64'F(abs(S1.f16)) == +INF) || (64'F(S2.f16) == 0.0)) then - # // x/inf, 0/y - # D0.f16 = sign_out ? -16'0.0 : 16'0.0 - # else - # D0.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DIV_FIXUP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f16) ^ sign(S2.f16)) if isNAN(F(S2.f16)): D0.f16 = F(cvtToQuietNAN(F(S2.f16))) @@ -16930,67 +5189,21 @@ def _VOP3AOp_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f16 = ((-0.0) if (sign_out) else (0.0)) else: D0.f16 = ((-abs(S0.f16)) if (sign_out) else (abs(S0.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHL_ADD_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 << S1.u32[2 : 0].u32) + S2.u64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHL_ADD_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 << S1.u32[2 : 0].u32) + S2.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(S0.f64)) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif (WAVE_MODE.IEEE && isSignalNAN(S1.f64)) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 == +0.0) && (S1.f64 == -0.0)) then - # D0.f64 = S1.f64 - # elsif ((S0.f64 == -0.0) && (S1.f64 == +0.0)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = S0.f64 < S1.f64 ? S0.f64 : S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MIN_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(S0.f64)): D0.f64 = cvtToQuietNAN(S0.f64) elif (WAVE_MODE.IEEE and isSignalNAN(S1.f64)): @@ -17005,33 +5218,9 @@ def _VOP3AOp_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f64 = S0.f64 else: D0.f64 = ((S0.f64) if (S0.f64 < S1.f64) else (S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_MODE.IEEE && isSignalNAN(S0.f64)) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif (WAVE_MODE.IEEE && isSignalNAN(S1.f64)) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 == +0.0) && (S1.f64 == -0.0)) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 == -0.0) && (S1.f64 == +0.0)) then - # D0.f64 = S1.f64 - # elsif WAVE_MODE.IEEE then - # D0.f64 = S0.f64 >= S1.f64 ? S0.f64 : S1.f64 - # else - # D0.f64 = S0.f64 > S1.f64 ? S0.f64 : S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAX_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (WAVE_MODE.IEEE and isSignalNAN(S0.f64)): D0.f64 = cvtToQuietNAN(S0.f64) elif (WAVE_MODE.IEEE and isSignalNAN(S1.f64)): @@ -17048,155 +5237,55 @@ def _VOP3AOp_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f64 = ((S0.f64) if (S0.f64 >= S1.f64) else (S1.f64)) else: D0.f64 = ((S0.f64) if (S0.f64 > S1.f64) else (S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * 2.0 ** S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LDEXP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * 2.0 ** S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 * S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_LO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 * S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u32) * (S1.u32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i32) * (S1.i32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * 2.0F ** S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LDEXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * 2.0 ** S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # lane = S1.u32[5 : 0]; - # // Lane select - # D0.b32 = VGPR[lane][SRC0.u32] - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_READLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- lane = S1.u32[5 : 0] D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32; - # for i in 0 : 31 do - # tmp += S0[i].u32; - # // count i'th bit - # endfor; - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_BCNT_U32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32) for i in range(0, int(31)+1): tmp += S0[i].u32 D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 << S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHLREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 << S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 >> S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_LSHRREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 >> S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = (S1.i64 >> S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ASHRREV_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = (S1.i64 >> S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # shift = 32'I(S1[4 : 0].u32) * 53; - # if exponent(S0.f64) > 1077 then - # shift += exponent(S0.f64) - 1077 - # endif; - # // (2.0/PI) == 0.{b_1200, b_1199, b_1198, ..., b_1, b_0} - # // b_1200 is the MSB of the fractional part of 2.0/PI - # // Left shift operation indicates which bits are brought - # result = 64'F((1201'B(2.0 / PI)[1200 : 0] << shift.u32) & 1201'0x1fffffffffffff); - # scale = -53 - shift; - # if exponent(S0.f64) >= 1968 then - # scale += 128 - # endif; - # D0.f64 = ldexp(result, scale) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_TRIG_PREOP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): shift = (S1[4 : 0].u32) * 53 if exponent(S0.f64) > 1077: shift += exponent(S0.f64) - 1077 @@ -17205,353 +5294,129 @@ def _VOP3AOp_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal if exponent(S0.f64) >= 1968: scale += 128 D0.f64 = ldexp(result, scale) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3AOp_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_PKNORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); - # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PKNORM_I16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = f32_to_snorm(S0.f32) tmp[31 : 16].i16 = f32_to_snorm(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PKNORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); - # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PKNORM_U16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = f32_to_unorm(S0.f32) tmp[31 : 16].u16 = f32_to_unorm(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PKRTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PKRTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = u32_to_u16(S0.u32); - # tmp[31 : 16].u16 = u32_to_u16(S1.u32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PK_U16_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = u32_to_u16(S0.u32) tmp[31 : 16].u16 = u32_to_u16(S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = i32_to_i16(S0.i32); - # tmp[31 : 16].i16 = i32_to_i16(S1.i32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PK_I16_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = i32_to_i16(S0.i32) tmp[31 : 16].i16 = i32_to_i16(S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PKNORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); - # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PKNORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = f16_to_snorm(S0.f16) tmp[31 : 16].i16 = f16_to_snorm(S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PKNORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); - # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PKNORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = f16_to_unorm(S0.f16) tmp[31 : 16].u16 = f16_to_unorm(S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 + S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 + S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 - S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUB_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 - S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 + S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_ADD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 + S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 - S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_SUB_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 - S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 16].f16 = S1.f16; - # D0[15 : 0].f16 = S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_PACK_B32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 16].f16 = S1.f16 D0[15 : 0].f16 = S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MUL_LEGACY_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = 0.0F - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MUL_LEGACY_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = 0.0 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_DOT2C_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.f32; - # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); - # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_DOT2C_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.f32) tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16) tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_BITOP3_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 16'0U; - # tmp = (tmp | (32'I(TTBL.b32 & 0x1) != 0 ? 16'U(~S0.b16 & ~S1.b16 & ~S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x2) != 0 ? 16'U(~S0.b16 & ~S1.b16 & S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x4) != 0 ? 16'U(~S0.b16 & S1.b16 & ~S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x8) != 0 ? 16'U(~S0.b16 & S1.b16 & S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x10) != 0 ? 16'U(S0.b16 & ~S1.b16 & ~S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x20) != 0 ? 16'U(S0.b16 & ~S1.b16 & S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x40) != 0 ? 16'U(S0.b16 & S1.b16 & ~S2.b16) : 16'0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x80) != 0 ? 16'U(S0.b16 & S1.b16 & S2.b16) : 16'0U)); - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp = Reg(0) - tmp = Reg((tmp | ((TTBL.b32 & 0x1) != 0 ? (~S0.b16 & ~S1.b16 & ~S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x2) != 0 ? (~S0.b16 & ~S1.b16 & S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x4) != 0 ? (~S0.b16 & S1.b16 & ~S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x8) != 0 ? (~S0.b16 & S1.b16 & S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x10) != 0 ? (S0.b16 & ~S1.b16 & ~S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x20) != 0 ? (S0.b16 & ~S1.b16 & S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x40) != 0 ? (S0.b16 & S1.b16 & ~S2.b16) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x80) != 0 ? (S0.b16 & S1.b16 & S2.b16) : 0))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result - -def _VOP3AOp_V_BITOP3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # tmp = (tmp | (32'I(TTBL.b32 & 0x1) != 0 ? 32'U(~S0.b32 & ~S1.b32 & ~S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x2) != 0 ? 32'U(~S0.b32 & ~S1.b32 & S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x4) != 0 ? 32'U(~S0.b32 & S1.b32 & ~S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x8) != 0 ? 32'U(~S0.b32 & S1.b32 & S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x10) != 0 ? 32'U(S0.b32 & ~S1.b32 & ~S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x20) != 0 ? 32'U(S0.b32 & ~S1.b32 & S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x40) != 0 ? 32'U(S0.b32 & S1.b32 & ~S2.b32) : 0U)); - # tmp = (tmp | (32'I(TTBL.b32 & 0x80) != 0 ? 32'U(S0.b32 & S1.b32 & S2.b32) : 0U)); - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- - tmp = Reg(0) - tmp = Reg((tmp | ((TTBL.b32 & 0x1) != 0 ? (~S0.b32 & ~S1.b32 & ~S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x2) != 0 ? (~S0.b32 & ~S1.b32 & S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x4) != 0 ? (~S0.b32 & S1.b32 & ~S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x8) != 0 ? (~S0.b32 & S1.b32 & S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x10) != 0 ? (S0.b32 & ~S1.b32 & ~S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x20) != 0 ? (S0.b32 & ~S1.b32 & S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x40) != 0 ? (S0.b32 & S1.b32 & ~S2.b32) : 0))) - tmp = Reg((tmp | ((TTBL.b32 & 0x80) != 0 ? (S0.b32 & S1.b32 & S2.b32) : 0))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp0 = f32_to_fp8_scale(S0.f32, scale.u8); - # tmp1 = f32_to_fp8_scale(S1.f32, scale.u8); - # dstword = OPSEL[3].i32 * 16; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S2.f32)) tmp0 = f32_to_fp8_scale(S0.f32, scale.u8) tmp1 = f32_to_fp8_scale(S1.f32, scale.u8) dstword = OPSEL[3].i32 * 16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp0 = f32_to_bf8_scale(S0.f32, scale.u8); - # tmp1 = f32_to_bf8_scale(S1.f32, scale.u8); - # dstword = OPSEL[3].i32 * 16; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S2.f32)) tmp0 = f32_to_bf8_scale(S0.f32, scale.u8) tmp1 = f32_to_bf8_scale(S1.f32, scale.u8) dstword = OPSEL[3].i32 * 16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp = f32_to_fp8_sr_scale(S0.f32, S1.u32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): scale = (exponent(S2.f32)) tmp = Reg(f32_to_fp8_sr_scale(S0.f32, S1.u32, scale.u8)) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp = f32_to_bf8_sr_scale(S0.f32, S1.u32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): scale = (exponent(S2.f32)) tmp = Reg(f32_to_bf8_sr_scale(S0.f32, S1.u32, scale.u8)) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcword = OPSEL[0].i32 * 16; - # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; - # D0[31 : 0].f32 = tmp0; - # D0[63 : 32].f32 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -17559,20 +5424,10 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 D0[31 : 0].f32 = tmp0 D0[63 : 32].f32 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcword = OPSEL[0].i32 * 16; - # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; - # D0[31 : 0].f32 = tmp0; - # D0[63 : 32].f32 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -17580,75 +5435,36 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 D0[31 : 0].f32 = tmp0 D0[63 : 32].f32 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8; - # tmp = fp8_to_f32_scale(src, scale.u8); - S1 = Reg(s1) - tmp = Reg(0) - laneId = lane +def _VOP3AOp_V_CVT_SCALEF32_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) srcbyte = OPSEL[1 : 0].i32 * 8 src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8 tmp = Reg(fp8_to_f32_scale(src, scale.u8)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8; - # tmp = bf8_to_f32_scale(src, scale.u8); - S1 = Reg(s1) - tmp = Reg(0) - laneId = lane +def _VOP3AOp_V_CVT_SCALEF32_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) srcbyte = OPSEL[1 : 0].i32 * 8 src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8 tmp = Reg(bf8_to_f32_scale(src, scale.u8)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp0 = f32_to_fp4_scale(S0.f32, scale.u8); - # tmp1 = f32_to_fp4_scale(S1.f32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S2.f32)) tmp0 = f32_to_fp4_scale(S0.f32, scale.u8) tmp1 = f32_to_fp4_scale(S1.f32, scale.u8) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # tmp0 = f32_to_fp4_sr_scale(S0[31 : 0].f32, randomVal, scale.u8); - # tmp1 = f32_to_fp4_sr_scale(S0[63 : 32].f32, randomVal, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S2.f32)) @@ -17656,20 +5472,10 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F32(s0, s1, s2, d0, scc, vcc, lane, exec_m tmp0 = f32_to_fp4_sr_scale(S0[31 : 0].f32, randomVal, scale.u8) tmp1 = f32_to_fp4_sr_scale(S0[63 : 32].f32, randomVal, scale.u8) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; - # D0[31 : 0].f32 = tmp0; - # D0[63 : 32].f32 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -17677,160 +5483,70 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F32_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8 D0[31 : 0].f32 = tmp0 D0[63 : 32].f32 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # tmp0 = f16_to_fp8_scale(S0[15 : 0].f16, scale.u8); - # tmp1 = f16_to_fp8_scale(S0[31 : 16].f16, scale.u8); - # dstword = OPSEL[3].i32 * 16; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S1.f32)) tmp0 = f16_to_fp8_scale(S0[15 : 0].f16, scale.u8) tmp1 = f16_to_fp8_scale(S0[31 : 16].f16, scale.u8) dstword = OPSEL[3].i32 * 16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # tmp0 = f16_to_bf8_scale(S0[15 : 0].f16, scale.u8); - # tmp1 = f16_to_bf8_scale(S0[31 : 16].f16, scale.u8); - # dstword = OPSEL[3].i32 * 16; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S1.f32)) tmp0 = f16_to_bf8_scale(S0[15 : 0].f16, scale.u8) tmp1 = f16_to_bf8_scale(S0[31 : 16].f16, scale.u8) dstword = OPSEL[3].i32 * 16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp = f16_to_fp8_sr_scale(S0.f16, S1.u32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): scale = (exponent(S2.f32)) tmp = Reg(f16_to_fp8_sr_scale(S0.f16, S1.u32, scale.u8)) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp = f16_to_bf8_sr_scale(S0.f16, S1.u32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): scale = (exponent(S2.f32)) tmp = Reg(f16_to_bf8_sr_scale(S0.f16, S1.u32, scale.u8)) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # tmp0 = bf16_to_fp8_scale(S0[15 : 0].bf16, scale.u8); - # tmp1 = bf16_to_fp8_scale(S0[31 : 16].bf16, scale.u8); - # dstword = OPSEL[3].i32 * 16; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_SCALEF32_PK_FP8_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S1.f32)) tmp0 = bf16_to_fp8_scale(S0[15 : 0].bf16, scale.u8) tmp1 = bf16_to_fp8_scale(S0[31 : 16].bf16, scale.u8) dstword = OPSEL[3].i32 * 16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # tmp0 = bf16_to_bf8_scale(S0[15 : 0].bf16, scale.u8); - # tmp1 = bf16_to_bf8_scale(S0[31 : 16].bf16, scale.u8); - # dstword = OPSEL[3].i32 * 16; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_SCALEF32_PK_BF8_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S1.f32)) tmp0 = bf16_to_bf8_scale(S0[15 : 0].bf16, scale.u8) tmp1 = bf16_to_bf8_scale(S0[31 : 16].bf16, scale.u8) dstword = OPSEL[3].i32 * 16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp = bf16_to_fp8_sr_scale(S0.bf16, S1.u32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_SCALEF32_SR_FP8_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): scale = (exponent(S2.f32)) tmp = Reg(bf16_to_fp8_sr_scale(S0.bf16, S1.u32, scale.u8)) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # tmp = bf16_to_bf8_sr_scale(S0.bf16, S1.u32, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3AOp_V_CVT_SCALEF32_SR_BF8_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): scale = (exponent(S2.f32)) tmp = Reg(bf16_to_bf8_sr_scale(S0.bf16, S1.u32, scale.u8)) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcword = OPSEL[0].i32 * 16; - # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; - # D0[15 : 0].f16 = tmp0; - # D0[31 : 16].f16 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -17838,20 +5554,10 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 D0[15 : 0].f16 = tmp0 D0[31 : 16].f16 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcword = OPSEL[0].i32 * 16; - # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; - # D0[15 : 0].f16 = tmp0; - # D0[31 : 16].f16 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -17859,94 +5565,45 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 D0[15 : 0].f16 = tmp0 D0[31 : 16].f16 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_F16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8; - # tmp = fp8_to_f16_scale(src, scale.u8); - # // OPSEL[3] controls destination hi/lo - S1 = Reg(s1) - tmp = Reg(0) - laneId = lane +def _VOP3AOp_V_CVT_SCALEF32_F16_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) srcbyte = OPSEL[1 : 0].i32 * 8 src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].fp8 tmp = Reg(fp8_to_f16_scale(src, scale.u8)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_F16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8; - # tmp = bf8_to_f16_scale(src, scale.u8); - # // OPSEL[3] controls destination hi/lo - S1 = Reg(s1) - tmp = Reg(0) - laneId = lane +def _VOP3AOp_V_CVT_SCALEF32_F16_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) srcbyte = OPSEL[1 : 0].i32 * 8 src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].bf8 tmp = Reg(bf8_to_f16_scale(src, scale.u8)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # tmp0 = f16_to_fp4_scale(S0[15 : 0].f16, scale.u8); - # tmp1 = f16_to_fp4_scale(S0[31 : 16].f16, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S1.f32)) tmp0 = f16_to_fp4_scale(S0[15 : 0].f16, scale.u8) tmp1 = f16_to_fp4_scale(S0[31 : 16].f16, scale.u8) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # tmp0 = bf16_to_fp4_scale(S0[15 : 0].bf16, scale.u8); - # tmp1 = bf16_to_fp4_scale(S0[31 : 16].bf16, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_SCALEF32_PK_FP4_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S1.f32)) tmp0 = bf16_to_fp4_scale(S0[15 : 0].bf16, scale.u8) tmp1 = bf16_to_fp4_scale(S0[31 : 16].bf16, scale.u8) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # tmp0 = f16_to_fp4_sr_scale(S0[15 : 0].f16, randomVal, scale.u8); - # tmp1 = f16_to_fp4_sr_scale(S0[31 : 16].f16, randomVal, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S2.f32)) @@ -17954,20 +5611,9 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_F16(s0, s1, s2, d0, scc, vcc, lane, exec_m tmp0 = f16_to_fp4_sr_scale(S0[15 : 0].f16, randomVal, scale.u8) tmp1 = f16_to_fp4_sr_scale(S0[31 : 16].f16, randomVal, scale.u8) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # tmp0 = bf16_to_fp4_sr_scale(S0[15 : 0].bf16, randomVal, scale.u8); - # tmp1 = bf16_to_fp4_sr_scale(S0[31 : 16].bf16, randomVal, scale.u8); - # dstbyte = OPSEL[3 : 2].i32 * 8; - # // Other destination bits are preserved - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) +def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- scale = (exponent(S2.f32)) @@ -17975,20 +5621,10 @@ def _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_ tmp0 = bf16_to_fp4_sr_scale(S0[15 : 0].bf16, randomVal, scale.u8) tmp1 = bf16_to_fp4_sr_scale(S0[31 : 16].bf16, randomVal, scale.u8) dstbyte = OPSEL[3 : 2].i32 * 8 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; - # D0[15 : 0].f16 = tmp0; - # D0[31 : 16].f16 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -17996,20 +5632,10 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8 D0[15 : 0].f16 = tmp0 D0[31 : 16].f16 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcbyte = OPSEL[1 : 0].i32 * 8; - # src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8; - # D0[15 : 0].bf16 = tmp0; - # D0[31 : 16].bf16 = tmp1 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -18017,446 +5643,42 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4(s0, s1, s2, d0, scc, vcc, lane, exec_mas src = VGPR[laneId][SRC0.u32][srcbyte + 7 : srcbyte].b8 D0[15 : 0].bf16 = tmp0 D0[31 : 16].bf16 = tmp1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_2XPK16_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # declare tmp : 192'B; - # for pass in 0 : 15 do - # // Note that S0 and S1 inputs are interleaved in the packed result. - # tmp[dOffset + 5 : dOffset].fp6 = f32_to_fp6_scale(S0[sOffset + 31 : sOffset].f32, scale.u8); - # tmp[dOffset + 11 : dOffset + 6].fp6 = f32_to_fp6_scale(S1[sOffset + 31 : sOffset].f32, scale.u8) - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - for pass in range(0, int(15)+1): - tmp[dOffset + 5 : dOffset].fp6 = f32_to_fp6_scale(S0[sOffset + 31 : sOffset].f32, scale.u8) - tmp[dOffset + 11 : dOffset + 6].fp6 = f32_to_fp6_scale(S1[sOffset + 31 : sOffset].f32, scale.u8) - D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_2XPK16_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # declare tmp : 192'B; - # for pass in 0 : 15 do - # // Note that S0 and S1 inputs are interleaved in the packed result. - # tmp[dOffset + 5 : dOffset].bf6 = f32_to_bf6_scale(S0[sOffset + 31 : sOffset].f32, scale.u8); - # tmp[dOffset + 11 : dOffset + 6].bf6 = f32_to_bf6_scale(S1[sOffset + 31 : sOffset].f32, scale.u8) - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - for pass in range(0, int(15)+1): - tmp[dOffset + 5 : dOffset].bf6 = f32_to_bf6_scale(S0[sOffset + 31 : sOffset].f32, scale.u8) - tmp[dOffset + 11 : dOffset + 6].bf6 = f32_to_bf6_scale(S1[sOffset + 31 : sOffset].f32, scale.u8) - D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].fp6 = f32_to_fp6_sr_scale(S0[sOffset + 31 : sOffset].f32, randomVal, - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - randomVal = S1.u32 - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].fp6 = f32_to_fp6_sr_scale(S0[sOffset + 31 : sOffset].f32, randomVal, endfor; D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].bf6 = f32_to_bf6_sr_scale(S0[sOffset + 31 : sOffset].f32, randomVal, - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - randomVal = S1.u32 - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].bf6 = f32_to_bf6_sr_scale(S0[sOffset + 31 : sOffset].f32, randomVal, endfor; D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 1024'B; - # for pass in 0 : 31 do - # tmp[dOffset + 31 : dOffset].f32 = fp6_to_f32_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) - # endfor; - # D0[1023 : 0] = tmp.b1024 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 31 : dOffset].f32 = fp6_to_f32_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) - D0[1023 : 0] = tmp.b1024 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_F32_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 1024'B; - # for pass in 0 : 31 do - # tmp[dOffset + 31 : dOffset].f32 = bf6_to_f32_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) - # endfor; - # D0[1023 : 0] = tmp.b1024 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 31 : dOffset].f32 = bf6_to_f32_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) - D0[1023 : 0] = tmp.b1024 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].fp6 = bf16_to_fp6_scale(S0[sOffset + 15 : sOffset].bf16, scale.u8) - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].fp6 = bf16_to_fp6_scale(S0[sOffset + 15 : sOffset].bf16, scale.u8) - D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].bf6 = f16_to_bf6_scale(S0[sOffset + 15 : sOffset].f16, scale.u8) - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].bf6 = f16_to_bf6_scale(S0[sOffset + 15 : sOffset].f16, scale.u8) - D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].bf6 = bf16_to_bf6_scale(S0[sOffset + 15 : sOffset].bf16, scale.u8) - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].bf6 = bf16_to_bf6_scale(S0[sOffset + 15 : sOffset].bf16, scale.u8) - D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].fp6 = f16_to_fp6_sr_scale(S0[sOffset + 15 : sOffset].f16, randomVal, - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - randomVal = S1.u32 - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].fp6 = f16_to_fp6_sr_scale(S0[sOffset + 15 : sOffset].f16, randomVal, endfor; D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].fp6 = bf16_to_fp6_sr_scale(S0[sOffset + 15 : sOffset].bf16, randomVal, - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - randomVal = S1.u32 - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].fp6 = bf16_to_fp6_sr_scale(S0[sOffset + 15 : sOffset].bf16, randomVal, endfor; D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].bf6 = f16_to_bf6_sr_scale(S0[sOffset + 15 : sOffset].f16, randomVal, - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - randomVal = S1.u32 - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].bf6 = f16_to_bf6_sr_scale(S0[sOffset + 15 : sOffset].f16, randomVal, endfor; D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S2.f32)); - # randomVal = S1.u32; - # declare tmp : 192'B; - # for pass in 0 : 31 do - # tmp[dOffset + 5 : dOffset].bf6 = bf16_to_bf6_sr_scale(S0[sOffset + 15 : sOffset].bf16, randomVal, - # endfor; - # D0[191 : 0] = tmp.b192 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S2.f32)) - randomVal = S1.u32 - for pass in range(0, int(31)+1): - tmp[dOffset + 5 : dOffset].bf6 = bf16_to_bf6_sr_scale(S0[sOffset + 15 : sOffset].bf16, randomVal, endfor; D0[191 : 0] = tmp.b192 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 512'B; - # for pass in 0 : 31 do - # tmp[dOffset + 15 : dOffset].f16 = fp6_to_f16_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) - # endfor; - # D0[511 : 0] = tmp.b512 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 15 : dOffset].f16 = fp6_to_f16_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) - D0[511 : 0] = tmp.b512 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_FP6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 512'B; - # for pass in 0 : 31 do - # tmp[dOffset + 15 : dOffset].bf16 = fp6_to_bf16_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) - # endfor; - # D0[511 : 0] = tmp.b512 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 15 : dOffset].bf16 = fp6_to_bf16_scale(S0[sOffset + 5 : sOffset].fp6, scale.u8) - D0[511 : 0] = tmp.b512 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_F16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 512'B; - # for pass in 0 : 31 do - # tmp[dOffset + 15 : dOffset].f16 = bf6_to_f16_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) - # endfor; - # D0[511 : 0] = tmp.b512 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 15 : dOffset].f16 = bf6_to_f16_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) - D0[511 : 0] = tmp.b512 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_BF6(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # declare tmp : 512'B; - # for pass in 0 : 31 do - # tmp[dOffset + 15 : dOffset].bf16 = bf6_to_bf16_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) - # endfor; - # D0[511 : 0] = tmp.b512 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- - scale = (exponent(S1.f32)) - for pass in range(0, int(31)+1): - tmp[dOffset + 15 : dOffset].bf16 = bf6_to_bf16_scale(S0[sOffset + 5 : sOffset].bf6, scale.u8) - D0[511 : 0] = tmp.b512 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _VOP3AOp_V_ASHR_PK_I8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 16'B; - # tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32); - # tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32); - # D0[15 : 0] = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3AOp_V_ASHR_PK_I8_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32) tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32) D0[15 : 0] = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_ASHR_PK_U8_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 16'B; - # tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32); - # tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32); - # D0[15 : 0] = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3AOp_V_ASHR_PK_U8_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[7 : 0] = SAT8(S0.i32 >> S2[4 : 0].u32) tmp[15 : 8] = SAT8(S1.i32 >> S2[4 : 0].u32) D0[15 : 0] = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_PK_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PK_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_PK_BF16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].bf16 = f32_to_bf16(S0.f32); - # tmp[31 : 16].bf16 = f32_to_bf16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3AOp_V_CVT_PK_BF16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].bf16 = f32_to_bf16(S0.f32) tmp[31 : 16].bf16 = f32_to_bf16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcword = OPSEL[0].i32 * 16; - # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; - # D0[15 : 0].bf16 = tmp0.bf16; - # D0[31 : 16].bf16 = tmp1.bf16 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -18464,20 +5686,10 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mas src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 D0[15 : 0].bf16 = tmp0.bf16 D0[31 : 16].bf16 = tmp1.bf16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # scale = 32'U(exponent(S1.f32)); - # srcword = OPSEL[0].i32 * 16; - # src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16; - # D0[15 : 0].bf16 = tmp0.bf16; - # D0[31 : 16].bf16 = tmp1.bf16 - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) - laneId = lane SRC0 = Reg(src0_idx) # --- compiled pseudocode --- scale = (exponent(S1.f32)) @@ -18485,33 +5697,15 @@ def _VOP3AOp_V_CVT_SCALEF32_PK_BF16_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mas src = VGPR[laneId][SRC0.u32][srcword + 15 : srcword].b16 D0[15 : 0].bf16 = tmp0.bf16 D0[31 : 16].bf16 = tmp1.bf16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 32'F(v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MINIMUM3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = F(v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3AOp_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 32'F(v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3AOp_V_MAXIMUM3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = F(v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP3AOp_FUNCTIONS = { VOP3AOp.V_CMP_CLASS_F32: _VOP3AOp_V_CMP_CLASS_F32, @@ -18920,8 +6114,6 @@ VOP3AOp_FUNCTIONS = { VOP3AOp.V_PACK_B32_F16: _VOP3AOp_V_PACK_B32_F16, VOP3AOp.V_MUL_LEGACY_F32: _VOP3AOp_V_MUL_LEGACY_F32, VOP3AOp.V_DOT2C_F32_BF16: _VOP3AOp_V_DOT2C_F32_BF16, - VOP3AOp.V_BITOP3_B16: _VOP3AOp_V_BITOP3_B16, - VOP3AOp.V_BITOP3_B32: _VOP3AOp_V_BITOP3_B32, VOP3AOp.V_CVT_SCALEF32_PK_FP8_F32: _VOP3AOp_V_CVT_SCALEF32_PK_FP8_F32, VOP3AOp.V_CVT_SCALEF32_PK_BF8_F32: _VOP3AOp_V_CVT_SCALEF32_PK_BF8_F32, VOP3AOp.V_CVT_SCALEF32_SR_FP8_F32: _VOP3AOp_V_CVT_SCALEF32_SR_FP8_F32, @@ -18951,23 +6143,6 @@ VOP3AOp_FUNCTIONS = { VOP3AOp.V_CVT_SCALEF32_SR_PK_FP4_BF16: _VOP3AOp_V_CVT_SCALEF32_SR_PK_FP4_BF16, VOP3AOp.V_CVT_SCALEF32_PK_F16_FP4: _VOP3AOp_V_CVT_SCALEF32_PK_F16_FP4, VOP3AOp.V_CVT_SCALEF32_PK_BF16_FP4: _VOP3AOp_V_CVT_SCALEF32_PK_BF16_FP4, - VOP3AOp.V_CVT_SCALEF32_2XPK16_FP6_F32: _VOP3AOp_V_CVT_SCALEF32_2XPK16_FP6_F32, - VOP3AOp.V_CVT_SCALEF32_2XPK16_BF6_F32: _VOP3AOp_V_CVT_SCALEF32_2XPK16_BF6_F32, - VOP3AOp.V_CVT_SCALEF32_SR_PK32_FP6_F32: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F32, - VOP3AOp.V_CVT_SCALEF32_SR_PK32_BF6_F32: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F32, - VOP3AOp.V_CVT_SCALEF32_PK32_F32_FP6: _VOP3AOp_V_CVT_SCALEF32_PK32_F32_FP6, - VOP3AOp.V_CVT_SCALEF32_PK32_F32_BF6: _VOP3AOp_V_CVT_SCALEF32_PK32_F32_BF6, - VOP3AOp.V_CVT_SCALEF32_PK32_FP6_BF16: _VOP3AOp_V_CVT_SCALEF32_PK32_FP6_BF16, - VOP3AOp.V_CVT_SCALEF32_PK32_BF6_F16: _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_F16, - VOP3AOp.V_CVT_SCALEF32_PK32_BF6_BF16: _VOP3AOp_V_CVT_SCALEF32_PK32_BF6_BF16, - VOP3AOp.V_CVT_SCALEF32_SR_PK32_FP6_F16: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_F16, - VOP3AOp.V_CVT_SCALEF32_SR_PK32_FP6_BF16: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_FP6_BF16, - VOP3AOp.V_CVT_SCALEF32_SR_PK32_BF6_F16: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_F16, - VOP3AOp.V_CVT_SCALEF32_SR_PK32_BF6_BF16: _VOP3AOp_V_CVT_SCALEF32_SR_PK32_BF6_BF16, - VOP3AOp.V_CVT_SCALEF32_PK32_F16_FP6: _VOP3AOp_V_CVT_SCALEF32_PK32_F16_FP6, - VOP3AOp.V_CVT_SCALEF32_PK32_BF16_FP6: _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_FP6, - VOP3AOp.V_CVT_SCALEF32_PK32_F16_BF6: _VOP3AOp_V_CVT_SCALEF32_PK32_F16_BF6, - VOP3AOp.V_CVT_SCALEF32_PK32_BF16_BF6: _VOP3AOp_V_CVT_SCALEF32_PK32_BF16_BF6, VOP3AOp.V_ASHR_PK_I8_I32: _VOP3AOp_V_ASHR_PK_I8_I32, VOP3AOp.V_ASHR_PK_U8_I32: _VOP3AOp_V_ASHR_PK_U8_I32, VOP3AOp.V_CVT_PK_F16_F32: _VOP3AOp_V_CVT_PK_F16_F32, @@ -18978,162 +6153,44 @@ VOP3AOp_FUNCTIONS = { VOP3AOp.V_MAXIMUM3_F32: _VOP3AOp_V_MAXIMUM3_F32, } -def _VOP3BOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3BOp_V_ADD_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3BOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3BOp_V_SUB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3BOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32; - # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3BOp_V_SUBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32) VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3BOp_V_ADDC_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADDC_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3BOp_V_ADDC_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3BOp_V_SUBB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3BOp_V_SUBB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3BOp_V_SUBBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUBB_CO_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3BOp_V_SUBBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3BOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC = 0x0LL; - # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # D0.f32 = NAN.f32 - # elsif exponent(S2.f32) - exponent(S1.f32) >= 96 then - # // N/D near MAX_FLOAT_F32 - # VCC = 0x1LL; - # if S0.f32 == S1.f32 then - # // Only scale the denominator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif S1.f32 == DENORM.f32 then - # D0.f32 = ldexp(S0.f32, 64) - # elsif ((1.0 / 64'F(S1.f32) == DENORM.f64) && (S2.f32 / S1.f32 == DENORM.f32)) then - # VCC = 0x1LL; - # if S0.f32 == S1.f32 then - # // Only scale the denominator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif 1.0 / 64'F(S1.f32) == DENORM.f64 then - # D0.f32 = ldexp(S0.f32, -64) - # elsif S2.f32 / S1.f32 == DENORM.f32 then - # VCC = 0x1LL; - # if S0.f32 == S2.f32 then - # // Only scale the numerator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif exponent(S2.f32) <= 23 then - # // Numerator is tiny - # D0.f32 = ldexp(S0.f32, 64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(s0) - VCC = Reg(vcc) +def _VOP3BOp_V_DIV_SCALE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + D0 = Reg(S0._val) # --- compiled pseudocode --- VCC = Reg(0x0) if ((F(S2.f32) == 0.0) or (F(S1.f32) == 0.0)): @@ -19156,47 +6213,10 @@ def _VOP3BOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VCC = Reg(0x1); D0.f32 = ldexp(S0.f32, 64) if S1.f32 == DENORM.f32: D0.f32 = float("nan") - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3BOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC = 0x0LL; - # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then - # D0.f64 = NAN.f64 - # elsif exponent(S2.f64) - exponent(S1.f64) >= 768 then - # // N/D near MAX_FLOAT_F64 - # VCC = 0x1LL; - # if S0.f64 == S1.f64 then - # // Only scale the denominator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif S1.f64 == DENORM.f64 then - # D0.f64 = ldexp(S0.f64, 128) - # elsif ((1.0 / S1.f64 == DENORM.f64) && (S2.f64 / S1.f64 == DENORM.f64)) then - # VCC = 0x1LL; - # if S0.f64 == S1.f64 then - # // Only scale the denominator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif 1.0 / S1.f64 == DENORM.f64 then - # D0.f64 = ldexp(S0.f64, -128) - # elsif S2.f64 / S1.f64 == DENORM.f64 then - # VCC = 0x1LL; - # if S0.f64 == S2.f64 then - # // Only scale the numerator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif exponent(S2.f64) <= 53 then - # // Numerator is tiny - # D0.f64 = ldexp(S0.f64, 128) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(s0) - VCC = Reg(vcc) +def _VOP3BOp_V_DIV_SCALE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + D0 = Reg(S0._val) # --- compiled pseudocode --- VCC = Reg(0x0) if ((S2.f64 == 0.0) or (S1.f64 == 0.0)): @@ -19219,45 +6239,23 @@ def _VOP3BOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f64 = ldexp(S0.f64, 128) if S1.f64 == DENORM.f64: D0.f64 = float("nan") - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3BOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3BOp_V_MAD_U64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) # --- compiled pseudocode --- _full = ((S0.u32) * (S1.u32) + (S2.u64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - result['d1'] = D1._val & 1 - return result + return {'D0': D0, 'D1': D1} -def _VOP3BOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3BOp_V_MAD_I64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) # --- compiled pseudocode --- _full = ((S0.i32) * (S1.i32) + (S2.i64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - result['d1'] = D1._val & 1 - return result + return {'D0': D0, 'D1': D1} VOP3BOp_FUNCTIONS = { VOP3BOp.V_ADD_CO_U32: _VOP3BOp_V_ADD_CO_U32, diff --git a/extra/assembly/amd/autogen/rdna3/gen_pcode.py b/extra/assembly/amd/autogen/rdna3/gen_pcode.py index 3c9c0a93f9..fa9392de7a 100644 --- a/extra/assembly/amd/autogen/rdna3/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna3/gen_pcode.py @@ -5,1298 +5,449 @@ from extra.assembly.amd.autogen.rdna3.enum import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b64 = S0.b64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.b32 = S0.b32 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_CMOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.b64 = S0.b64 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_CMOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[63 : 0] = S0.u64[0 : 63] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[63 : 0] = S0.u64[0 : 63] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CTZ_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 63 do - # // Search from LSB - # if S0.u64[i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CTZ_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLZ_I32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 63 do - # // Search from MSB - # if S0.u64[63 - i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLZ_I32_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[63 - i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.u32[31 - i] != S0.u32[31] then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(1, int(31)+1): if S0.u32[31 - i] != S0.u32[31]: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if all bits are the same - # for i in 1 : 63 do - # // Search from MSB - # if S0.u64[63 - i] != S0.u64[63] then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLS_I32_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(1, int(63)+1): if S0.u64[63 - i] != S0.u64[63]: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i8)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_SEXT_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i8)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_SEXT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[S0.u32[4 : 0]] = 1'0U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[S0.u32[4 : 0]] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[S0.u32[5 : 0]] = 1'0U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[S0.u32[5 : 0]] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[S0.u32[4 : 0]] = 1'1U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[S0.u32[4 : 0]] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[S0.u32[5 : 0]] = 1'1U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[S0.u32[5 : 0]] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32; - # for i in 0 : 31 do - # D0.u64[i * 2] = tmp[i]; - # D0.u64[i * 2 + 1] = tmp[i] - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITREPLICATE_B64_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32) for i in range(0, int(31)+1): D0.u64[i * 2] = tmp[i] D0.u64[i * 2 + 1] = tmp[i] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; - # SCC = D0.i32 != 0 - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_ABS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((-S0.i32) if (S0.i32 < 0) else (S0.i32)) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 31 do - # tmp += S0.u32[i] == 1'0U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT0_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp += ((1) if (S0.u32[i] == 0) else (0)) D0.i32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 63 do - # tmp += S0.u64[i] == 1'0U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT0_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp += ((1) if (S0.u64[i] == 0) else (0)) D0.i32 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 31 do - # tmp += S0.u32[i] == 1'1U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT1_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp += ((1) if (S0.u32[i] == 1) else (0)) D0.i32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 63 do - # tmp += S0.u64[i] == 1'1U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT1_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp += ((1) if (S0.u64[i] == 1) else (0)) D0.i32 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # for i in 0 : 7 do - # tmp[i] = S0.u32[i * 4 +: 4] != 0U - # endfor; - # D0.u32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_QUADMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(7)+1): tmp[i] = S0.u32[(i * 4) + (4) - 1 : (i * 4)] != 0 D0.u32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0ULL; - # for i in 0 : 15 do - # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL - # endfor; - # D0.u64 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_QUADMASK_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(15)+1): tmp[i] = S0.u64[(i * 4) + (4) - 1 : (i * 4)] != 0 D0.u64 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # declare i : 6'U; - # for i in 6'0U : 6'31U do - # tmp[i] = S0.u32[i & 6'60U +: 6'4U] != 0U - # endfor; - # D0.u32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_WQM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp[i] = S0.u32[(i & 60) + (4) - 1 : (i & 60)] != 0 D0.u32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0ULL; - # declare i : 6'U; - # for i in 6'0U : 6'63U do - # tmp[i] = S0.u64[i & 6'60U +: 6'4U] != 0ULL - # endfor; - # D0.u64 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_WQM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp[i] = S0.u64[(i & 60) + (4) - 1 : (i & 60)] != 0 D0.u64 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~S0.u64; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_NOT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~S0.u64 SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 & EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set - # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 | EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set - # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 ^ EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XOR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 ^ EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 ^ EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = ~(S0.u32 & EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NAND_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = ~(S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NAND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = ~(S0.u32 | EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NOR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = ~(S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = ~(S0.u32 ^ EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XNOR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = ~(S0.u32 ^ EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 ^ EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XNOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u32; - # EXEC.u32 = (~S0.u32 & EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (~S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u64; - # EXEC.u64 = (~S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u32; - # EXEC.u32 = (~S0.u32 | EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (~S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u64; - # EXEC.u64 = (~S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (~S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 & ~EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 & ~EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 & ~EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 | ~EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 | ~EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 | ~EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 | ~EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u32 = (~S0.u32 & EXEC.u32); - # D0.u32 = EXEC.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_WREXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u32 = (~S0.u32 & EXEC.u32) D0.u32 = EXEC.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u64 = (~S0.u64 & EXEC.u64); - # D0.u64 = EXEC.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = EXEC.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u32 = (S0.u32 & ~EXEC.u32); - # D0.u32 = EXEC.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_WREXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u32 = (S0.u32 & ~EXEC.u32) D0.u32 = EXEC.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u64 = (S0.u64 & ~EXEC.u64); - # D0.u64 = EXEC.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = EXEC.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_GETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = PC + 4LL - D0 = Reg(d0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_GETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = PC + 4 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _SOP1Op_S_SETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = S0.i64 - S0 = Reg(s0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_SETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): PC = Reg(S0.i64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOP1Op_S_SWAPPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # jump_addr = S0.i64; - # D0.i64 = PC + 4LL; - # PC = jump_addr.i64 - S0 = Reg(s0) - D0 = Reg(d0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_SWAPPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): jump_addr = S0.i64 D0.i64 = PC + 4 PC = Reg(jump_addr.i64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'PC': PC} -def _SOP1Op_S_RFE_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = S0.i64 - S0 = Reg(s0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_RFE_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): PC = Reg(S0.i64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If SDST is VCC then VCCZ is undefined. - VCC = Reg(vcc) - VCCZ = Reg(1 if VCC._val == 0 else 0) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result +def _SOP1Op_S_SENDMSG_RTN_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If SDST is VCC then VCCZ is undefined. - VCC = Reg(vcc) - VCCZ = Reg(1 if VCC._val == 0 else 0) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result +def _SOP1Op_S_SENDMSG_RTN_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0[31 : 16].f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_HI_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0[31 : 16].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} SOP1Op_FUNCTIONS = { SOP1Op.S_MOV_B32: _SOP1Op_S_MOV_B32, @@ -1376,815 +527,282 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_RNDNE_F16: _SOP1Op_S_RNDNE_F16, } -def _SOP2Op_S_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) SCC = Reg(((1) if (S1.u32 > S0.u32) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ADD_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i32 + S1.i32; - # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i32 + S1.i32) SCC = Reg(((S0.u32[31] == S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i32 - S1.i32; - # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i32 - S1.i32) SCC = Reg(((S0.u32[31] != S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ADDC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADDC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + SCC.u64) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUBB_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - SCC.u32; - # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUBB_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - SCC.u32) SCC = Reg(((1) if ((S1.u32) + SCC.u64 > (S0.u32)) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 - S1.i32; - # if D0.i32 < 0 then - # D0.i32 = -D0.i32 - # endif; - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ABSDIFF_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 - S1.i32 if D0.i32 < 0: D0.i32 = -D0.i32 SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 << S1[4 : 0].u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 << S1[4 : 0].u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 << S1[5 : 0].u32); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 << S1[5 : 0].u32) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 >> S1[4 : 0].u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 >> S1[4 : 0].u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 >> S1[5 : 0].u32); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 >> S1[5 : 0].u32) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ASHR_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i32) >> S1[4 : 0].u32) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); - # SCC = D0.i64 != 0LL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ASHR_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32) SCC = Reg(D0.i64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL1_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 1) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL2_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 2) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL3_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 3) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL4_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 4) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < S1.i32; - # D0.i32 = SCC ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < S1.u32; - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= S1.i32; - # D0.i32 = SCC ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= S1.u32; - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 & S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 & S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 | S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 | S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 ^ S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 ^ S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 & S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NAND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 & S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 & S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NAND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 & S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 | S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 | S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 | S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 | S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 ^ S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XNOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 ^ S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & ~S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_NOT1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & ~S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 & ~S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_NOT1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 & ~S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | ~S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_NOT1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | ~S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 | ~S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_NOT1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 | ~S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); - # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) +def _SOP2Op_S_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_BFE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); - # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); - # SCC = D0.i64 != 0LL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) +def _SOP2Op_S_BFE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32) SCC = Reg(D0.i64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_BFM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (((1 << S0[5 : 0].u32) - 1) << S1[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 * S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 * S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u32) * (S1.u32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i32) * (S1.i32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_CSELECT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = SCC ? S0.u64 : S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_CSELECT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ((S0.u64) if (SCC) else (S1.u64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_LL_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[15 : 0].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_LH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[31 : 16].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_HH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[31 : 16].u16, S0[31 : 16].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[15 : 0].u16, S0[31 : 16].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_HL_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[15 : 0].u16, S0[31 : 16].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif LT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # else - # if isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif LT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f32)): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) @@ -2207,44 +825,9 @@ def _SOP2Op_S_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif GT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # else - # if isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif GT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f32)): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) @@ -2267,127 +850,45 @@ def _SOP2Op_S_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _SOP2Op_S_FMAAK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _SOP2Op_S_FMAMK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _SOP2Op_S_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif LT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # else - # if isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif LT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f16)): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) @@ -2410,44 +911,9 @@ def _SOP2Op_S_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif GT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # else - # if isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif GT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f16)): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) @@ -2470,31 +936,15 @@ def _SOP2Op_S_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, D0.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, D0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} SOP2Op_FUNCTIONS = { SOP2Op.S_ADD_U32: _SOP2Op_S_ADD_U32, @@ -2566,523 +1016,189 @@ SOP2Op_FUNCTIONS = { SOP2Op.S_FMAC_F16: _SOP2Op_S_FMAC_F16, } -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 == S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 != S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 > S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 <= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 == S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 != S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 > S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 <= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32[S1.u32[4 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32[S1.u32[4 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64[S1.u32[5 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64[S1.u32[5 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64 == S1.u64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64 != S1.u64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 < S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 < S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 == S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 == S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 <= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 <= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 > S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 > S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 <> S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 <> S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 >= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 >= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg((isNAN(F(S0.f32)) or isNAN(F(S1.f32)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg((isNAN(F(S0.f16)) or isNAN(F(S1.f16)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 >= S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 >= S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 != S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 != S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 > S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 > S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 <= S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 <= S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 == S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 == S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 < S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 < S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_EQ_I32: _SOPCOp_S_CMP_EQ_I32, @@ -3133,211 +1249,114 @@ SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_NLT_F16: _SOPCOp_S_CMP_NLT_F16, } -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(SIMM16.i16)) - D0 = Reg(d0) +def _SOPKOp_S_MOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- D0.i32 = (signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Do nothing - for use by tools only - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result +def _SOPKOp_S_VERSION(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.i32 = 32'I(signext(SIMM16.i16)) - # endif - D0 = Reg(d0) - SCC = Reg(scc) +def _SOPKOp_S_CMOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- if SCC: D0.i32 = (signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOPKOp_S_CMPK_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = 64'I(S0.i32) == signext(SIMM16.i16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg((S0.i32) == signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = 64'I(S0.i32) != signext(SIMM16.i16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_LG_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg((S0.i32) != signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = 64'I(S0.i32) > signext(SIMM16.i16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg((S0.i32) > signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = 64'I(S0.i32) >= signext(SIMM16.i16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg((S0.i32) >= signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = 64'I(S0.i32) < signext(SIMM16.i16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg((S0.i32) < signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = 64'I(S0.i32) <= signext(SIMM16.i16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg((S0.i32) <= signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 == 32'U(SIMM16.u16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg(S0.u32 == (SIMM16.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 != 32'U(SIMM16.u16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_LG_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg(S0.u32 != (SIMM16.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 > 32'U(SIMM16.u16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg(S0.u32 > (SIMM16.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= 32'U(SIMM16.u16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg(S0.u32 >= (SIMM16.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < 32'U(SIMM16.u16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg(S0.u32 < (SIMM16.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_CMPK_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <= 32'U(SIMM16.u16) - S0 = Reg(s0) - SCC = Reg(scc) +def _SOPKOp_S_CMPK_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- SCC = Reg(S0.u32 <= (SIMM16.u16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPKOp_S_ADDK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # D0.i32 = 32'I(64'I(D0.i32) + signext(SIMM16.i16)); - # SCC = ((tmp[31] == SIMM16.i16[15]) && (tmp[31] != D0.i32[31])); - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) +def _SOPKOp_S_ADDK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- tmp = Reg(D0.i32) D0.i32 = ((D0.i32) + signext(SIMM16.i16)) SCC = Reg(((tmp[31] == SIMM16.i16[15]) and (tmp[31] != D0.i32[31]))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(64'I(D0.i32) * signext(SIMM16.i16)) - D0 = Reg(d0) +def _SOPKOp_S_MULK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- D0.i32 = ((D0.i32) * signext(SIMM16.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOPKOp_S_CALL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = PC + 4LL; - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - D0 = Reg(d0) +def _SOPKOp_S_CALL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- D0.i64 = PC + 4 PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'PC': PC} SOPKOp_FUNCTIONS = { SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, @@ -3360,259 +1379,118 @@ SOPKOp_FUNCTIONS = { SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64, } -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # for i in 0U : SIMM16.u16[3 : 0].u32 do - # endfor +def _SOPPOp_S_NOP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- for i in range(0, int(SIMM16.u16[3 : 0].u32)+1): pass - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # instruction may be omitted. For wave64 the compiler may not know the status of the EXEC mask and hence - # // 1 cycle delay here - # // 2 cycles delay here - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result +def _SOPPOp_S_DELAY_ALU(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // PC passed into trap handler points to S_TRAP itself, - # PC = TBA.i64; - # // trap base address - PC = Reg(pc) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result +def _SOPPOp_S_TRAP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {'PC': PC} -def _SOPPOp_S_BRANCH(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL; +def _SOPPOp_S_BRANCH(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_SCC0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - SCC = Reg(scc) +def _SOPPOp_S_CBRANCH_SCC0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if SCC == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'SCC': SCC, 'PC': PC} -def _SOPPOp_S_CBRANCH_SCC1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - SCC = Reg(scc) +def _SOPPOp_S_CBRANCH_SCC1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if SCC == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'SCC': SCC, 'PC': PC} -def _SOPPOp_S_CBRANCH_VCCZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If VCCZ is 1 then jump to a constant offset relative to the current PC. - # if VCCZ.u1 == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - VCC = Reg(vcc) +def _SOPPOp_S_CBRANCH_VCCZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- if VCCZ.u1 == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_VCCNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If VCCZ is 0 then jump to a constant offset relative to the current PC. - # if VCCZ.u1 == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - VCC = Reg(vcc) +def _SOPPOp_S_CBRANCH_VCCNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- if VCCZ.u1 == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_EXECZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if EXECZ.u1 == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - EXEC = Reg(exec_mask) +def _SOPPOp_S_CBRANCH_EXECZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) EXECZ = Reg(1 if EXEC._val == 0 else 0) # --- compiled pseudocode --- if EXECZ.u1 == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_EXECNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if EXECZ.u1 == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - EXEC = Reg(exec_mask) +def _SOPPOp_S_CBRANCH_EXECNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) EXECZ = Reg(1 if EXEC._val == 0 else 0) # --- compiled pseudocode --- if EXECZ.u1 == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGSYS(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if WAVE_STATUS.COND_DBG_SYS.u32 != 0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGSYS(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if WAVE_STATUS.COND_DBG_SYS.u32 != 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGUSER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if WAVE_STATUS.COND_DBG_USER.u32 != 0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGUSER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if WAVE_STATUS.COND_DBG_USER.u32 != 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_STATUS.COND_DBG_SYS || WAVE_STATUS.COND_DBG_USER) then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGSYS_OR_USER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if (WAVE_STATUS.COND_DBG_SYS or WAVE_STATUS.COND_DBG_USER): PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (WAVE_STATUS.COND_DBG_SYS && WAVE_STATUS.COND_DBG_USER) then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif +def _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if (WAVE_STATUS.COND_DBG_SYS and WAVE_STATUS.COND_DBG_USER): PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} SOPPOp_FUNCTIONS = { SOPPOp.S_NOP: _SOPPOp_S_NOP, @@ -3631,40 +1509,11 @@ SOPPOp_FUNCTIONS = { SOPPOp.S_CBRANCH_CDBGSYS_AND_USER: _SOPPOp_S_CBRANCH_CDBGSYS_AND_USER, } -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'U; - # if WAVE64 then - # // 64 lanes - # if EXEC == 0x0LL then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b64(EXEC)); - # // Lowest active lane - # endif - # else - # // 32 lanes - # if EXEC_LO.i32 == 0 then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b32(EXEC_LO)); - # // Lowest active lane - # endif - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) +def _VOP1Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) EXEC_LO = SliceProxy(EXEC, 31, 0) # --- compiled pseudocode --- @@ -3679,914 +1528,363 @@ def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter else: lane = (s_ff1_i32_b32(EXEC_LO)) D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NEAREST_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_FLOOR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = floor(S0.f64 + 0.5) if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b16 = S0.b16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_MOV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b16 = S0.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + -floor(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + -floor(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32; - # // Can only raise integer DIV_BY_ZERO exception - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / S0.f64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / S0.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sin(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = cos(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CLZ_I32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CTZ_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.i32[31 - i] != S0.i32[31] then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CLS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f64) - 1023 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.i32 = 0 else: D0.i32 = exponent(S0.f64) - 1023 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = mantissa(S0.f64) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.f64 = S0.f64 else: D0.f64 = mantissa(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + -floor(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + -floor(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f32) - 127 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.i32 = 0 else: D0.i32 = exponent(S0.f32) - 127 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.f32 = S0.f32 - # else - # D0.f32 = mantissa(S0.f32) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.f32 = S0.f32 else: D0.f32 = mantissa(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # addr = SRC0.u32; - # // Raw value from instruction - # D0.b32 = VGPR[laneId][addr].b32 - D0 = Reg(d0) - laneId = lane +def _VOP1Op_V_MOVRELS_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- addr = SRC0.u32 D0.b32 = VGPR[laneId][addr].b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = u16_to_f16(S0.u16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = u16_to_f16(S0.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = i16_to_f16(S0.i16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = i16_to_f16(S0.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_u16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_u16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_i16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_i16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.f16 = S0.f16 - # else - # D0.f16 = mantissa(S0.f16) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.f16 = S0.f16 else: D0.f16 = mantissa(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.i16 = 16'0 - # else - # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.i16 = 0 else: D0.i16 = (exponent(S0.f16) - 15 + 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + -floor(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + -floor(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sin(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_COS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = cos(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b16 = { SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16) } - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SAT_PK_U8_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b16 = _pack(SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_snorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_snorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_unorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_unorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.b32; - # D0.b32 = S0.b32; - # S0.b32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SWAP_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.b32) D0.b32 = S0.b32 S0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.b16; - # D0.b16 = S0.b16; - # S0.b16 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SWAP_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.b16) D0.b16 = S0.b16 S0.b16 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = ~S0.u16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_NOT_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ~S0.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { 16'0, S0.u16 } - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(0, S0.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} VOP1Op_FUNCTIONS = { VOP1Op.V_MOV_B32: _VOP1Op_V_MOV_B32, @@ -4669,199 +1967,64 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_CVT_U32_U16: _VOP1Op_V_CVT_U32_U16, } -def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP2Op_V_DOT2ACC_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.f32; - # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); - # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP2Op_V_DOT2ACC_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.f32) tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S1.f32 - S0.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S1.f32 - S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = S2.f32 - # else - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = S2.f32 else: D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = 0.0F - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = 0.0 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i24) * (S1.i24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u24) * (S1.u24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif LT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # else - # if isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif LT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f32)): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) @@ -4884,44 +2047,9 @@ def _VOP2Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif GT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # else - # if isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif GT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f32)): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) @@ -4944,387 +2072,139 @@ def _VOP2Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 << S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 << S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = (S1.i32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S1.i32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 - S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 - S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S1.u32 - S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S1.u32 - S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAMK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAAK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP2Op_V_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S1.f16 - S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S1.f16 - S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, D0.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, D0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAMK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAAK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif GT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # else - # if isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif GT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f16)): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) @@ -5347,44 +2227,9 @@ def _VOP2Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif LT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # else - # if isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif LT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f16)): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) @@ -5407,33 +2252,16 @@ def _VOP2Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16); - # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_PK_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP2Op_FUNCTIONS = { VOP2Op.V_CNDMASK_B32: _VOP2Op_V_CNDMASK_B32, @@ -5484,1937 +2312,375 @@ VOP2Op_FUNCTIONS = { VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16, } -def _VOP3Op_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -7428,54 +2694,9 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -7489,54 +2710,9 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -7550,1245 +2726,377 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 < S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f16 == S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 > S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <> S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 >= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 < S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f32 == S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 > S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <> S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 >= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 < S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f64 == S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 > S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <> S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 >= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 < S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i16 == S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 > S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <> S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 >= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 < S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u16 == S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 > S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <> S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 >= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 < S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i64 == S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 > S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <> S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 >= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 < S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 > S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 >= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -8802,46 +3110,9 @@ def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -8855,46 +3126,9 @@ def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -8908,45 +3142,13 @@ def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'U; - # if WAVE64 then - # // 64 lanes - # if EXEC == 0x0LL then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b64(EXEC)); - # // Lowest active lane - # endif - # else - # // 32 lanes - # if EXEC_LO.i32 == 0 then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b32(EXEC_LO)); - # // Lowest active lane - # endif - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) +def _VOP3Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) EXEC_LO = SliceProxy(EXEC, 31, 0) # --- compiled pseudocode --- @@ -8961,1060 +3163,403 @@ def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter else: lane = (s_ff1_i32_b32(EXEC_LO)) D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_NEAREST_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_FLOOR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = floor(S0.f64 + 0.5) if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b16 = S0.b16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MOV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b16 = S0.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + -floor(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + -floor(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32; - # // Can only raise integer DIV_BY_ZERO exception - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / S0.f64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / S0.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sin(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = cos(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CLZ_I32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CTZ_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.i32[31 - i] != S0.i32[31] then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CLS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f64) - 1023 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.i32 = 0 else: D0.i32 = exponent(S0.f64) - 1023 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = mantissa(S0.f64) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.f64 = S0.f64 else: D0.f64 = mantissa(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + -floor(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + -floor(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f32) - 127 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.i32 = 0 else: D0.i32 = exponent(S0.f32) - 127 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.f32 = S0.f32 - # else - # D0.f32 = mantissa(S0.f32) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.f32 = S0.f32 else: D0.f32 = mantissa(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # addr = SRC0.u32; - # // Raw value from instruction - # D0.b32 = VGPR[laneId][addr].b32 - D0 = Reg(d0) - laneId = lane +def _VOP3Op_V_MOVRELS_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- addr = SRC0.u32 D0.b32 = VGPR[laneId][addr].b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = u16_to_f16(S0.u16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = u16_to_f16(S0.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = i16_to_f16(S0.i16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = i16_to_f16(S0.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_u16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_u16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_i16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_i16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.f16 = S0.f16 - # else - # D0.f16 = mantissa(S0.f16) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_MANT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.f16 = S0.f16 else: D0.f16 = mantissa(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.i16 = 16'0 - # else - # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.i16 = 0 else: D0.i16 = (exponent(S0.f16) - 15 + 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + -floor(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FRACT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + -floor(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sin(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_COS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = cos(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b16 = { SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16) } - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAT_PK_U8_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b16 = _pack(SAT8(S0[31 : 16].i16), SAT8(S0[15 : 0].i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_snorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_snorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_unorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_unorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = ~S0.u16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_NOT_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ~S0.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { 16'0, S0.u16 } - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(0, S0.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S1.f32 - S0.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S1.f32 - S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMAC_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = S2.f32 - # else - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMAC_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = S2.f32 else: D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = 0.0F - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = 0.0 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i24) * (S1.i24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u24) * (S1.u24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif LT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # else - # if isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif LT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f32)): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) @@ -10037,44 +3582,9 @@ def _VOP3Op_V_MIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif GT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # else - # if isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif GT_NEG_ZERO(S0.f32, S1.f32) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f32)): D0.f32 = F(cvtToQuietNAN(F(S0.f32))) @@ -10097,279 +3607,97 @@ def _VOP3Op_V_MAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 << S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 << S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = (S1.i32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S1.i32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 - S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 - S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S1.u32 - S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUBREV_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S1.u32 - S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S1.f16 - S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S1.f16 - S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, D0.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, D0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif GT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # else - # if isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif GT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f16)): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) @@ -10392,44 +3720,9 @@ def _VOP3Op_V_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif LT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # else - # if isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif LT_NEG_ZERO(S0.f16, S1.f16) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(F(S0.f16)): D0.f16 = F(cvtToQuietNAN(F(S0.f16))) @@ -10452,95 +3745,28 @@ def _VOP3Op_V_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = S2.f32 - # else - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = S2.f32 else: D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) + S2.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # if S2.f32 < 0.0F then - # D0.f32 = 5.0F - # else - # D0.f32 = 4.0F - # endif - # elsif abs(S1.f32) >= abs(S0.f32) then - # if S1.f32 < 0.0F then - # D0.f32 = 3.0F - # else - # D0.f32 = 2.0F - # endif - # else - # if S0.f32 < 0.0F then - # D0.f32 = 1.0F - # else - # D0.f32 = 0.0F - # endif - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBEID_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): if S2.f32 < 0.0: D0.f32 = 5.0 @@ -10556,36 +3782,9 @@ def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0.f32 = 1.0 else: D0.f32 = 0.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = cubemap S coordinate. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # if S2.f32 < 0.0F then - # D0.f32 = -S0.f32 - # else - # D0.f32 = S0.f32 - # endif - # elsif abs(S1.f32) >= abs(S0.f32) then - # D0.f32 = S0.f32 - # else - # if S0.f32 < 0.0F then - # D0.f32 = S2.f32 - # else - # D0.f32 = -S2.f32 - # endif - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBESC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): if S2.f32 < 0.0: D0.f32 = -S0.f32 @@ -10598,32 +3797,9 @@ def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0.f32 = S2.f32 else: D0.f32 = -S2.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = cubemap T coordinate. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # D0.f32 = -S1.f32 - # elsif abs(S1.f32) >= abs(S0.f32) then - # if S1.f32 < 0.0F then - # D0.f32 = -S2.f32 - # else - # D0.f32 = S2.f32 - # endif - # else - # D0.f32 = -S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBETC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): D0.f32 = -S1.f32 elif abs(S1.f32) >= abs(S0.f32): @@ -10633,254 +3809,88 @@ def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0.f32 = S2.f32 else: D0.f32 = -S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = 2.0 * cubemap major axis. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # D0.f32 = S2.f32 * 2.0F - # elsif abs(S1.f32) >= abs(S0.f32) then - # D0.f32 = S1.f32 * 2.0F - # else - # D0.f32 = S0.f32 * 2.0F - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBEMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): D0.f32 = S2.f32 * 2.0 elif abs(S1.f32) >= abs(S0.f32): D0.f32 = S1.f32 * 2.0 else: D0.f32 = S0.f32 * 2.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); - # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFI_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = fma(S0.f64, S1.f64, S2.f64) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); - # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); - # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); - # tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1U); - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_LERP_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1 << 24)) tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1 << 16) tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1 << 8) tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0].u32) & 0xffffffffLL) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ALIGNBIT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((_pack32(S0.u32, S1.u32) >> S2.u32[4 : 0].u32) & 0xffffffff) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0].u32 * 8U)) & 0xffffffffLL) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ALIGNBYTE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((_pack32(S0.u32, S1.u32) >> (S2.u32[1 : 0].u32 * 8)) & 0xffffffff) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S1.f32 == -MAX_FLOAT_F32) || (64'F(S1.f32) == -INF) || isNAN(64'F(S1.f32)) || (S2.f32 <= 0.0F) || - # isNAN(64'F(S2.f32))) then - # D0.f32 = -MAX_FLOAT_F32 - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MULLIT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S1.f32 == -MAX_FLOAT_F32) or (F(S1.f32) == (-INF)) or isNAN(F(S1.f32)) or (S2.f32 <= 0.0) or isNAN(F(S2.f32))): D0.f32 = -MAX_FLOAT_F32 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_min_f32(v_min_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_max_f32(v_max_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then - # D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) - # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then - # D0.f32 = v_max_f32(S1.f32, S2.f32) - # elsif v_max3_f32(S0.f32, S1.f32, S2.f32) == S1.f32 then - # D0.f32 = v_max_f32(S0.f32, S2.f32) - # else - # D0.f32 = v_max_f32(S0.f32, S1.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isNAN(F(S0.f32)) or isNAN(F(S1.f32)) or isNAN(F(S2.f32))): D0.f32 = v_min3_f32(S0.f32, S1.f32, S2.f32) elif v_max3_f32(S0.f32, S1.f32, S2.f32) == S0.f32: @@ -10889,172 +3899,57 @@ def _VOP3Op_V_MED3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f32 = v_max_f32(S0.f32, S2.f32) else: D0.f32 = v_max_f32(S0.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then - # D0.i32 = v_max_i32(S1.i32, S2.i32) - # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then - # D0.i32 = v_max_i32(S0.i32, S2.i32) - # else - # D0.i32 = v_max_i32(S0.i32, S1.i32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32: D0.i32 = v_max_i32(S1.i32, S2.i32) elif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32: D0.i32 = v_max_i32(S0.i32, S2.i32) else: D0.i32 = v_max_i32(S0.i32, S1.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then - # D0.u32 = v_max_u32(S1.u32, S2.u32) - # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then - # D0.u32 = v_max_u32(S0.u32, S2.u32) - # else - # D0.u32 = v_max_u32(S0.u32, S1.u32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32: D0.u32 = v_max_u32(S1.u32, S2.u32) elif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32: D0.u32 = v_max_u32(S0.u32, S2.u32) else: D0.u32 = v_max_u32(S0.u32, S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); - # tmp += 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); - # tmp += 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); - # tmp += 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += (ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])) tmp += (ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])) tmp += (ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])) tmp += (ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_HI_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((v_sad_u8(S0, S1, 0)) << 16) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); - # tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16) tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); - # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_PK_U8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S2.u32 & (~(0xff << (S1.u32[1 : 0].u32 * 8))))) tmp = Reg((tmp | (((f32_to_u8(S0.f32)) & 255) << (S1.u32[1 : 0].u32 * 8)))) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f32) ^ sign(S2.f32)); - # if isNAN(64'F(S2.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif ((64'F(S1.f32) == 0.0) && (64'F(S2.f32) == 0.0)) then - # // 0/0 - # D0.f32 = 32'F(0xffc00000) - # elsif ((64'F(abs(S1.f32)) == +INF) && (64'F(abs(S2.f32)) == +INF)) then - # // inf/inf - # D0.f32 = 32'F(0xffc00000) - # elsif ((64'F(S1.f32) == 0.0) || (64'F(abs(S2.f32)) == +INF)) then - # // x/0, or inf/y - # D0.f32 = sign_out ? -INF.f32 : +INF.f32 - # elsif ((64'F(abs(S1.f32)) == +INF) || (64'F(S2.f32) == 0.0)) then - # // x/inf, 0/y - # D0.f32 = sign_out ? -0.0F : 0.0F - # elsif exponent(S2.f32) - exponent(S1.f32) < -150 then - # D0.f32 = sign_out ? -UNDERFLOW_F32 : UNDERFLOW_F32 - # elsif exponent(S1.f32) == 255 then - # D0.f32 = sign_out ? -OVERFLOW_F32 : OVERFLOW_F32 - # else - # D0.f32 = sign_out ? -abs(S0.f32) : abs(S0.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FIXUP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f32) ^ sign(S2.f32)) if isNAN(F(S2.f32)): D0.f32 = F(cvtToQuietNAN(F(S2.f32))) @@ -11074,40 +3969,9 @@ def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) else: D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) if isNAN(S0.f32) else ((-abs(S0.f32)) if (sign_out) else (abs(S0.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f64) ^ sign(S2.f64)); - # if isNAN(S2.f64) then - # D0.f64 = cvtToQuietNAN(S2.f64) - # elsif isNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif ((S1.f64 == 0.0) && (S2.f64 == 0.0)) then - # // 0/0 - # D0.f64 = 64'F(0xfff8000000000000LL) - # elsif ((abs(S1.f64) == +INF) && (abs(S2.f64) == +INF)) then - # // inf/inf - # D0.f64 = 64'F(0xfff8000000000000LL) - # elsif ((S1.f64 == 0.0) || (abs(S2.f64) == +INF)) then - # // x/0, or inf/y - # D0.f64 = sign_out ? -INF : +INF - # elsif ((abs(S1.f64) == +INF) || (S2.f64 == 0.0)) then - # // x/inf, 0/y - # D0.f64 = sign_out ? -0.0 : 0.0 - # elsif exponent(S2.f64) - exponent(S1.f64) < -1075 then - # D0.f64 = sign_out ? -UNDERFLOW_F64 : UNDERFLOW_F64 - # elsif exponent(S1.f64) == 2047 then - # D0.f64 = sign_out ? -OVERFLOW_F64 : OVERFLOW_F64 - # else - # D0.f64 = sign_out ? -abs(S0.f64) : abs(S0.f64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FIXUP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f64) ^ sign(S2.f64)) if isNAN(S2.f64): D0.f64 = cvtToQuietNAN(S2.f64) @@ -11127,90 +3991,32 @@ def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) else: D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if VCC.u64[laneId] then - # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) - # else - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FMAS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if VCC.u64[laneId]: D0.f32 = (2.0 ** 64 if exponent(S2.f32) > 127 else 2.0 ** -64) * fma(S0.f32, S1.f32, S2.f32) else: D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if VCC.u64[laneId] then - # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) - # else - # D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FMAS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if VCC.u64[laneId]: D0.f64 = (2.0 ** 128 if exponent(S2.f64) > 1023 else 2.0 ** -128) * fma(S0.f64, S1.f64, S2.f64) else: D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); - # tmp += S1.u32[15 : 8] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); - # tmp += S1.u32[23 : 16] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); - # tmp += S1.u32[31 : 24] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_MSAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += ((0) if (S1.u32[7 : 0] == 0) else ((ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])))) tmp += ((0) if (S1.u32[15 : 8] == 0) else ((ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])))) tmp += ((0) if (S1.u32[23 : 16] == 0) else ((ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])))) tmp += ((0) if (S1.u32[31 : 24] == 0) else ((ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])))) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); - # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); - # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); - # tmp[15 : 0] = 16'B(v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)); - # D0.b64 = tmp.b64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_QSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[63 : 48] = (v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)) @@ -11218,21 +4024,9 @@ def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, tmp[31 : 16] = (v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)) tmp[15 : 0] = (v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); - # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); - # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); - # tmp[15 : 0] = 16'B(v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)); - # D0.b64 = tmp.b64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_MQSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[63 : 48] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)) @@ -11240,21 +4034,9 @@ def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal tmp[31 : 16] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)) tmp[15 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); - # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); - # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); - # tmp[31 : 0] = 32'B(v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)); - # D0.b128 = tmp.b128 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_MQSAD_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[127 : 96] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)) @@ -11262,187 +4044,64 @@ def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp[63 : 32] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)) tmp[31 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)) D0.b128 = tmp.b128 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XOR3_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 + S2.u16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 + S2.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_PERM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 24] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[31 : 24]); - # D0[23 : 16] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[23 : 16]); - # D0[15 : 8] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[15 : 8]); - # D0[7 : 0] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[7 : 0]) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_PERM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24]) D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16]) D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8]) D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0]) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHL_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_LSHL_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_min_f16(v_min_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_max_f16(v_max_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then - # D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) - # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then - # D0.f16 = v_max_f16(S1.f16, S2.f16) - # elsif v_max3_f16(S0.f16, S1.f16, S2.f16) == S1.f16 then - # D0.f16 = v_max_f16(S0.f16, S2.f16) - # else - # D0.f16 = v_max_f16(S0.f16, S1.f16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isNAN(F(S0.f16)) or isNAN(F(S1.f16)) or isNAN(F(S2.f16))): D0.f16 = v_min3_f16(S0.f16, S1.f16, S2.f16) elif v_max3_f16(S0.f16, S1.f16, S2.f16) == S0.f16: @@ -11451,94 +4110,31 @@ def _VOP3Op_V_MED3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f16 = v_max_f16(S0.f16, S2.f16) else: D0.f16 = v_max_f16(S0.f16, S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then - # D0.i16 = v_max_i16(S1.i16, S2.i16) - # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then - # D0.i16 = v_max_i16(S0.i16, S2.i16) - # else - # D0.i16 = v_max_i16(S0.i16, S1.i16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16: D0.i16 = v_max_i16(S1.i16, S2.i16) elif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16: D0.i16 = v_max_i16(S0.i16, S2.i16) else: D0.i16 = v_max_i16(S0.i16, S1.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then - # D0.u16 = v_max_u16(S1.u16, S2.u16) - # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then - # D0.u16 = v_max_u16(S0.u16, S2.u16) - # else - # D0.u16 = v_max_u16(S0.u16, S1.u16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16: D0.u16 = v_max_u16(S1.u16, S2.u16) elif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16: D0.u16 = v_max_u16(S0.u16, S2.u16) else: D0.u16 = v_max_u16(S0.u16, S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 * S1.i16 + S2.i16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 * S1.i16 + S2.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f16) ^ sign(S2.f16)); - # if isNAN(64'F(S2.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif ((64'F(S1.f16) == 0.0) && (64'F(S2.f16) == 0.0)) then - # // 0/0 - # D0.f16 = 16'F(0xfe00) - # elsif ((64'F(abs(S1.f16)) == +INF) && (64'F(abs(S2.f16)) == +INF)) then - # // inf/inf - # D0.f16 = 16'F(0xfe00) - # elsif ((64'F(S1.f16) == 0.0) || (64'F(abs(S2.f16)) == +INF)) then - # // x/0, or inf/y - # D0.f16 = sign_out ? -INF.f16 : +INF.f16 - # elsif ((64'F(abs(S1.f16)) == +INF) || (64'F(S2.f16) == 0.0)) then - # // x/inf, 0/y - # D0.f16 = sign_out ? -16'0.0 : 16'0.0 - # else - # D0.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FIXUP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f16) ^ sign(S2.f16)) if isNAN(F(S2.f16)): D0.f16 = F(cvtToQuietNAN(F(S2.f16))) @@ -11554,576 +4150,211 @@ def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f16 = ((-0.0) if (sign_out) else (0.0)) else: D0.f16 = ((-abs(S0.f16)) if (sign_out) else (abs(S0.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHL_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_AND_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 & S1.u32) | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32 | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_OR3_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32 | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u16) * (S1.u16) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i16) * (S1.i16) + S2.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = VCC.u64[laneId] ? S1.u16 : S0.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CNDMASK_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S1.u16) if (VCC.u64[laneId]) else (S0.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_min_f32(v_max_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_min_f32(v_max_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_max_f32(v_min_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_max_f32(v_min_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_min_f16(v_max_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_min_f16(v_max_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_max_f16(v_min_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_max_f16(v_min_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f16; - # tmp += S0[15 : 0].f16 * S1[15 : 0].f16; - # tmp += S0[31 : 16].f16 * S1[31 : 16].f16; - # D0.f16 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_DOT2_F16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f16) tmp += S0[15 : 0].f16 * S1[15 : 0].f16 tmp += S0[31 : 16].f16 * S1[31 : 16].f16 D0.f16 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.bf16; - # tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16; - # tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16; - # D0.bf16 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_DOT2_BF16_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.bf16) tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16 tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16 D0.bf16 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 + S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 + S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 - S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 - S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[31 : 16] = 16'B(v_cvt_i16_f32(S1.f32)); - # tmp[15 : 0] = 16'B(v_cvt_i16_f32(S0.f32)); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_I16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16] = (v_cvt_i16_f32(S1.f32)) tmp[15 : 0] = (v_cvt_i16_f32(S0.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[31 : 16] = 16'B(v_cvt_u16_f32(S1.f32)); - # tmp[15 : 0] = 16'B(v_cvt_u16_f32(S0.f32)); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_U16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16] = (v_cvt_u16_f32(S1.f32)) tmp[15 : 0] = (v_cvt_u16_f32(S0.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 + S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 + S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 - S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 - S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 16].f16 = S1.f16; - # D0[15 : 0].f16 = S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_PACK_B32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 16].f16 = S1.f16 D0[15 : 0].f16 = S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); - # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = f16_to_snorm(S0.f16) tmp[31 : 16].i16 = f16_to_snorm(S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); - # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = f16_to_unorm(S0.f16) tmp[31 : 16].u16 = f16_to_unorm(S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * 2.0F ** S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LDEXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * 2.0 ** S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32; - # for i in 0 : 31 do - # tmp += S0[i].u32; - # // count i'th bit - # endfor; - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_BCNT_U32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32) for i in range(0, int(31)+1): tmp += S0[i].u32 D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); - # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_I16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = f32_to_snorm(S0.f32) tmp[31 : 16].i16 = f32_to_snorm(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); - # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_U16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = f32_to_unorm(S0.f32) tmp[31 : 16].u16 = f32_to_unorm(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = u32_to_u16(S0.u32); - # tmp[31 : 16].u16 = u32_to_u16(S1.u32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_U16_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = u32_to_u16(S0.u32) tmp[31 : 16].u16 = u32_to_u16(S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = i32_to_i16(S0.i32); - # tmp[31 : 16].i16 = i32_to_i16(S1.i32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_I16_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = i32_to_i16(S0.i32) tmp[31 : 16].i16 = i32_to_i16(S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 - S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 - S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 + S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 + S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where -0.0 < +0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(S0.f64) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isSignalNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif isQuietNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif isQuietNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif LT_NEG_ZERO(S0.f64, S1.f64) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - # else - # if isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif LT_NEG_ZERO(S0.f64, S1.f64) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(S0.f64): D0.f64 = cvtToQuietNAN(S0.f64) @@ -12146,45 +4377,9 @@ def _VOP3Op_V_MIN_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Version of comparison where +0.0 > -0.0, differs from IEEE - # if WAVE_MODE.IEEE then - # if isSignalNAN(S0.f64) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isSignalNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif isQuietNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif isQuietNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif GT_NEG_ZERO(S0.f64, S1.f64) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - # else - # if isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif GT_NEG_ZERO(S0.f64, S1.f64) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - # endif; - # // Inequalities in the above pseudocode behave differently from IEEE - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if WAVE_MODE.IEEE: if isSignalNAN(S0.f64): D0.f64 = cvtToQuietNAN(S0.f64) @@ -12207,74 +4402,25 @@ def _VOP3Op_V_MAX_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * 2.0 ** S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LDEXP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * 2.0 ** S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 * S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_LO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 * S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u32) * (S1.u32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i32) * (S1.i32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # shift = 32'I(S1[4 : 0].u32) * 53; - # if exponent(S0.f64) > 1077 then - # shift += exponent(S0.f64) - 1077 - # endif; - # // (2.0/PI) == 0.{b_1200, b_1199, b_1198, ..., b_1, b_0} - # // b_1200 is the MSB of the fractional part of 2.0/PI - # // Left shift operation indicates which bits are brought - # result = 64'F((1201'B(2.0 / PI)[1200 : 0] << shift.u32) & 1201'0x1fffffffffffff); - # scale = -53 - shift; - # if exponent(S0.f64) >= 1968 then - # scale += 128 - # endif; - # D0.f64 = ldexp(result, scale) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRIG_PREOP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): shift = (S1[4 : 0].u32) * 53 if exponent(S0.f64) > 1077: shift += exponent(S0.f64) - 1077 @@ -12283,92 +4429,33 @@ def _VOP3Op_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if exponent(S0.f64) >= 1968: scale += 128 D0.f64 = ldexp(result, scale) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 << S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 << S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = (S1.i16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = (S1.i16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 << S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHLREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 << S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 >> S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHRREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 >> S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = (S1.i64 >> S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ASHRREV_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = (S1.i64 >> S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'U; - # if WAVE32 then - # lane = S1.u32[4 : 0].u32; - # // Lane select for wave32 - # else - # lane = S1.u32[5 : 0].u32; - # // Lane select for wave64 - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3Op_V_READLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if WAVE32: @@ -12376,42 +4463,19 @@ def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V else: lane = S1.u32[5 : 0].u32 D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S0.u16 & S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_AND_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S0.u16 & S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S0.u16 | S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_OR_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S0.u16 | S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S0.u16 ^ S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XOR_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S0.u16 ^ S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP3Op_FUNCTIONS = { VOP3Op.V_CMP_F_F16: _VOP3Op_V_CMP_F_F16, @@ -12834,102 +4898,26 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_XOR_B16: _VOP3Op_V_XOR_B16, } -def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_ADD_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUB_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUBREV_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC = 0x0LL; - # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # D0.f32 = NAN.f32 - # elsif exponent(S2.f32) - exponent(S1.f32) >= 96 then - # // N/D near MAX_FLOAT_F32 - # VCC = 0x1LL; - # if S0.f32 == S1.f32 then - # // Only scale the denominator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif S1.f32 == DENORM.f32 then - # D0.f32 = ldexp(S0.f32, 64) - # elsif ((1.0 / 64'F(S1.f32) == DENORM.f64) && (S2.f32 / S1.f32 == DENORM.f32)) then - # VCC = 0x1LL; - # if S0.f32 == S1.f32 then - # // Only scale the denominator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif 1.0 / 64'F(S1.f32) == DENORM.f64 then - # D0.f32 = ldexp(S0.f32, -64) - # elsif S2.f32 / S1.f32 == DENORM.f32 then - # VCC = 0x1LL; - # if S0.f32 == S2.f32 then - # // Only scale the numerator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif exponent(S2.f32) <= 23 then - # // Numerator is tiny - # D0.f32 = ldexp(S0.f32, 64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(s0) - VCC = Reg(vcc) +def _VOP3SDOp_V_DIV_SCALE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + D0 = Reg(S0._val) # --- compiled pseudocode --- VCC = Reg(0x0) if ((F(S2.f32) == 0.0) or (F(S1.f32) == 0.0)): @@ -12952,47 +4940,10 @@ def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal VCC = Reg(0x1); D0.f32 = ldexp(S0.f32, 64) if S1.f32 == DENORM.f32: D0.f32 = float("nan") - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC = 0x0LL; - # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then - # D0.f64 = NAN.f64 - # elsif exponent(S2.f64) - exponent(S1.f64) >= 768 then - # // N/D near MAX_FLOAT_F64 - # VCC = 0x1LL; - # if S0.f64 == S1.f64 then - # // Only scale the denominator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif S1.f64 == DENORM.f64 then - # D0.f64 = ldexp(S0.f64, 128) - # elsif ((1.0 / S1.f64 == DENORM.f64) && (S2.f64 / S1.f64 == DENORM.f64)) then - # VCC = 0x1LL; - # if S0.f64 == S1.f64 then - # // Only scale the denominator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif 1.0 / S1.f64 == DENORM.f64 then - # D0.f64 = ldexp(S0.f64, -128) - # elsif S2.f64 / S1.f64 == DENORM.f64 then - # VCC = 0x1LL; - # if S0.f64 == S2.f64 then - # // Only scale the numerator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif exponent(S2.f64) <= 53 then - # // Numerator is tiny - # D0.f64 = ldexp(S0.f64, 128) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(s0) - VCC = Reg(vcc) +def _VOP3SDOp_V_DIV_SCALE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + D0 = Reg(S0._val) # --- compiled pseudocode --- VCC = Reg(0x0) if ((S2.f64 == 0.0) or (S1.f64 == 0.0)): @@ -13015,105 +4966,41 @@ def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal D0.f64 = ldexp(S0.f64, 128) if S1.f64 == DENORM.f64: D0.f64 = float("nan") - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_MAD_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3SDOp_V_MAD_U64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) # --- compiled pseudocode --- _full = ((S0.u32) * (S1.u32) + (S2.u64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - result['d1'] = D1._val & 1 - return result + return {'D0': D0, 'D1': D1} -def _VOP3SDOp_V_MAD_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3SDOp_V_MAD_I64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) # --- compiled pseudocode --- _full = ((S0.i32) * (S1.i32) + (S2.i64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - result['d1'] = D1._val & 1 - return result + return {'D0': D0, 'D1': D1} -def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_ADD_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32; - # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32) VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} VOP3SDOp_FUNCTIONS = { VOP3SDOp.V_ADD_CO_CI_U32: _VOP3SDOp_V_ADD_CO_CI_U32, @@ -13128,373 +5015,175 @@ VOP3SDOp_FUNCTIONS = { VOP3SDOp.V_SUBREV_CO_U32: _VOP3SDOp_V_SUBREV_CO_U32, } -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; - # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16 tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; - # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16 tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; - # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_SUB_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16 tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32) tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32) tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32) tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 >= S1[31 : 16].i16) else (S1[31 : 16].i16)) tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 >= S1[15 : 0].i16) else (S1[15 : 0].i16)) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 < S1[31 : 16].i16) else (S1[31 : 16].i16)) tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 < S1[15 : 0].i16) else (S1[15 : 0].i16)) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16 tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16 tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16 tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 >= S1[31 : 16].u16) else (S1[31 : 16].u16)) tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 >= S1[15 : 0].u16) else (S1[15 : 0].u16)) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 < S1[31 : 16].u16) else (S1[31 : 16].u16)) tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 < S1[15 : 0].u16) else (S1[15 : 0].u16)) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); - # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16) tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; - # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16 tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; - # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16 tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = v_min_f16(S0[31 : 16].f16, S1[31 : 16].f16) tmp[15 : 0].f16 = v_min_f16(S0[15 : 0].f16, S1[15 : 0].f16) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].f16 = v_max_f16(S0[31 : 16].f16, S1[31 : 16].f16) tmp[15 : 0].f16 = v_max_f16(S0[15 : 0].f16, S1[15 : 0].f16) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); - # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); - # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); - # tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8); - # tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8) tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8) tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8) tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); - # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); - # tmp += u4_to_u32(S0[11 : 8].u4) * u4_to_u32(S1[11 : 8].u4); - # tmp += u4_to_u32(S0[15 : 12].u4) * u4_to_u32(S1[15 : 12].u4); - # tmp += u4_to_u32(S0[19 : 16].u4) * u4_to_u32(S1[19 : 16].u4); - # tmp += u4_to_u32(S0[23 : 20].u4) * u4_to_u32(S1[23 : 20].u4); - # tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4); - # tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT8_U32_U4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4) tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4) @@ -13505,28 +5194,14 @@ def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4) tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); - # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16) tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP3POp_FUNCTIONS = { VOP3POp.V_PK_MAD_I16: _VOP3POp_V_PK_MAD_I16, @@ -13554,1937 +5229,375 @@ VOP3POp_FUNCTIONS = { VOP3POp.V_DOT2_F32_BF16: _VOP3POp_V_DOT2_F32_BF16, } -def _VOPCOp_V_CMP_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 0. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'0U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1. Store the result into VCC or a scalar register. - # D0.u64[laneId] = 1'1U; - # // D0 = VCC in VOPC encoding. - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -15498,54 +5611,9 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -15559,54 +5627,9 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -15620,1245 +5643,377 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_F_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 < S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f16 == S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 > S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <> S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 >= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 < S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f32 == S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 > S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <> S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 >= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 < S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f64 == S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 > S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <> S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 >= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 < S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i16 == S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 > S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <> S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 >= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 < S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u16 == S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 > S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <> S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 >= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 < S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i64 == S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 > S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <> S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 >= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_F_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'0U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_F_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 0 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 < S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 > S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 >= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_T_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = 1'1U - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_T_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -16872,46 +6027,9 @@ def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -16925,46 +6043,9 @@ def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -16978,10 +6059,7 @@ def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} VOPCOp_FUNCTIONS = { VOPCOp.V_CMP_F_F16: _VOPCOp_V_CMP_F_F16, diff --git a/extra/assembly/amd/autogen/rdna4/gen_pcode.py b/extra/assembly/amd/autogen/rdna4/gen_pcode.py index 988b5ccec5..c7331ddd59 100644 --- a/extra/assembly/amd/autogen/rdna4/gen_pcode.py +++ b/extra/assembly/amd/autogen/rdna4/gen_pcode.py @@ -5,1384 +5,452 @@ from extra.assembly.amd.autogen.rdna4.enum import SOP1Op, SOP2Op, SOPCOp, SOPKOp, SOPPOp, VOP1Op, VOP2Op, VOP3Op, VOP3SDOp, VOP3POp, VOPCOp from extra.assembly.amd.pcode import * -def _SOP1Op_S_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_MOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b64 = S0.b64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_MOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_CMOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.b32 = S0.b32 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_CMOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CMOV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.b64 = S0.b64 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_CMOV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.b64 = S0.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[63 : 0] = S0.u64[0 : 63] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[63 : 0] = S0.u64[0 : 63] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CTZ_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CTZ_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 63 do - # // Search from LSB - # if S0.u64[i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CTZ_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLZ_I32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLZ_I32_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if no ones are found - # for i in 0 : 63 do - # // Search from MSB - # if S0.u64[63 - i] == 1'1U then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLZ_I32_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(0, int(63)+1): if S0.u64[63 - i] == 1: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.u32[31 - i] != S0.u32[31] then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(1, int(31)+1): if S0.u32[31 - i] != S0.u32[31]: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CLS_I32_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = -1; - # // Set if all bits are the same - # for i in 1 : 63 do - # // Search from MSB - # if S0.u64[63 - i] != S0.u64[63] then - # tmp = i; - # endif - # endfor; - # D0.i32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_CLS_I32_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(-1) for i in range(1, int(63)+1): if S0.u64[63 - i] != S0.u64[63]: tmp = Reg(i); break D0.i32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_SEXT_I32_I8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i8)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_SEXT_I32_I8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i8)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_SEXT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_SEXT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[S0.u32[4 : 0]] = 1'0U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[S0.u32[4 : 0]] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[S0.u32[5 : 0]] = 1'0U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[S0.u32[5 : 0]] = 0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BITSET1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[S0.u32[4 : 0]] = 1'1U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[S0.u32[4 : 0]] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_BITSET1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[S0.u32[5 : 0]] = 1'1U - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITSET1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[S0.u32[5 : 0]] = 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_BITREPLICATE_B64_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32; - # for i in 0 : 31 do - # D0.u64[i * 2] = tmp[i]; - # D0.u64[i * 2 + 1] = tmp[i] - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BITREPLICATE_B64_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32) for i in range(0, int(31)+1): D0.u64[i * 2] = tmp[i] D0.u64[i * 2 + 1] = tmp[i] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP1Op_S_ABS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < 0 ? -S0.i32 : S0.i32; - # SCC = D0.i32 != 0 - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_ABS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((-S0.i32) if (S0.i32 < 0) else (S0.i32)) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT0_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 31 do - # tmp += S0.u32[i] == 1'0U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT0_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp += ((1) if (S0.u32[i] == 0) else (0)) D0.i32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT0_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 63 do - # tmp += S0.u64[i] == 1'0U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT0_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp += ((1) if (S0.u64[i] == 0) else (0)) D0.i32 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT1_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 31 do - # tmp += S0.u32[i] == 1'1U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT1_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp += ((1) if (S0.u32[i] == 1) else (0)) D0.i32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_BCNT1_I32_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0; - # for i in 0 : 63 do - # tmp += S0.u64[i] == 1'1U ? 1 : 0 - # endfor; - # D0.i32 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_BCNT1_I32_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp += ((1) if (S0.u64[i] == 1) else (0)) D0.i32 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_QUADMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # for i in 0 : 7 do - # tmp[i] = S0.u32[i * 4 +: 4] != 0U - # endfor; - # D0.u32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_QUADMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(7)+1): tmp[i] = S0.u32[(i * 4) + (4) - 1 : (i * 4)] != 0 D0.u32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_QUADMASK_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0ULL; - # for i in 0 : 15 do - # tmp[i] = S0.u64[i * 4 +: 4] != 0ULL - # endfor; - # D0.u64 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_QUADMASK_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(15)+1): tmp[i] = S0.u64[(i * 4) + (4) - 1 : (i * 4)] != 0 D0.u64 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_WQM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0U; - # declare i : 6'U; - # for i in 6'0U : 6'31U do - # tmp[i] = S0.u32[i & 6'60U +: 6'4U] != 0U - # endfor; - # D0.u32 = tmp; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_WQM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(31)+1): tmp[i] = S0.u32[(i & 60) + (4) - 1 : (i & 60)] != 0 D0.u32 = tmp SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_WQM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 0ULL; - # declare i : 6'U; - # for i in 6'0U : 6'63U do - # tmp[i] = S0.u64[i & 6'60U +: 6'4U] != 0ULL - # endfor; - # D0.u64 = tmp; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP1Op_S_WQM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) for i in range(0, int(63)+1): tmp[i] = S0.u64[(i & 60) + (4) - 1 : (i & 60)] != 0 D0.u64 = tmp SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32; - # SCC = D0.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_NOT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~S0.u64; - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP1Op_S_NOT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~S0.u64 SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP1Op_S_AND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 & EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set - # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 | EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, set - # SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar destination - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 ^ EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XOR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 ^ EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 ^ EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NAND_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = ~(S0.u32 & EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NAND_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = ~(S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NAND_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NAND on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NAND_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = ~(S0.u32 | EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NOR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = ~(S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_NOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise NOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_NOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XNOR_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u32; - # EXEC.u32 = ~(S0.u32 ^ EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XNOR_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = ~(S0.u32 ^ EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_XNOR_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise XNOR on the scalar input and the EXEC mask, store the calculated result into the EXEC mask, - # set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the scalar - # saveexec = EXEC.u64; - # EXEC.u64 = ~(S0.u64 ^ EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_XNOR_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = ~(S0.u64 ^ EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u32; - # EXEC.u32 = (~S0.u32 & EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (~S0.u32 & EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u64; - # EXEC.u64 = (~S0.u64 & EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u32; - # EXEC.u32 = (~S0.u32 | EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (~S0.u32 | EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the EXEC mask and the negation of the scalar input, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u64; - # EXEC.u64 = (~S0.u64 | EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT0_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (~S0.u64 | EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 & ~EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 & ~EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # the EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 & ~EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u32; - # EXEC.u32 = (S0.u32 | ~EXEC.u32); - # D0.u32 = saveexec.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u32) EXEC.u32 = (S0.u32 | ~EXEC.u32) D0.u32 = saveexec.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise OR on the scalar input and the negation of the EXEC mask, store the calculated result into the - # EXEC mask, set SCC iff the calculated result is nonzero and store the original value of the EXEC mask into the - # saveexec = EXEC.u64; - # EXEC.u64 = (S0.u64 | ~EXEC.u64); - # D0.u64 = saveexec.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - saveexec = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_OR_NOT1_SAVEEXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): saveexec = Reg(EXEC.u64) EXEC.u64 = (S0.u64 | ~EXEC.u64) D0.u64 = saveexec.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u32 = (~S0.u32 & EXEC.u32); - # D0.u32 = EXEC.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_WREXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u32 = (~S0.u32 & EXEC.u32) D0.u32 = EXEC.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT0_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the EXEC mask and the negation of the scalar input, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u64 = (~S0.u64 & EXEC.u64); - # D0.u64 = EXEC.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT0_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64 = (~S0.u64 & EXEC.u64) D0.u64 = EXEC.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_WREXEC_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u32 = (S0.u32 & ~EXEC.u32); - # D0.u32 = EXEC.u32; - # SCC = EXEC.u32 != 0U - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_WREXEC_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u32 = (S0.u32 & ~EXEC.u32) D0.u32 = EXEC.u32 SCC = Reg(EXEC.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_AND_NOT1_WREXEC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Calculate bitwise AND on the scalar input and the negation of the EXEC mask, store the calculated result into - # Unlike the SAVEEXEC series of opcodes, the value written to destination SGPRs is the result of the bitwise-op - # result. EXEC and the destination SGPRs have the same value at the end of this instruction. This instruction is - # EXEC.u64 = (S0.u64 & ~EXEC.u64); - # D0.u64 = EXEC.u64; - # SCC = EXEC.u64 != 0ULL - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- +def _SOP1Op_S_AND_NOT1_WREXEC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64 = (S0.u64 & ~EXEC.u64) D0.u64 = EXEC.u64 SCC = Reg(EXEC.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC, 'EXEC': EXEC} -def _SOP1Op_S_GETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = PC + 4LL - D0 = Reg(d0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_GETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = PC + 4 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _SOP1Op_S_SETPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = S0.i64 - S0 = Reg(s0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_SETPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): PC = Reg(S0.i64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOP1Op_S_SWAPPC_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # jump_addr = S0.i64; - # D0.i64 = PC + 4LL; - # PC = jump_addr.i64 - S0 = Reg(s0) - D0 = Reg(d0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_SWAPPC_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): jump_addr = S0.i64 D0.i64 = PC + 4 PC = Reg(jump_addr.i64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'PC': PC} -def _SOP1Op_S_RFE_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = S0.i64 - S0 = Reg(s0) - PC = Reg(pc) - # --- compiled pseudocode --- +def _SOP1Op_S_RFE_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): PC = Reg(S0.i64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOP1Op_S_SENDMSG_RTN_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If SDST is VCC then VCCZ is undefined. - VCC = Reg(vcc) - VCCZ = Reg(1 if VCC._val == 0 else 0) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result +def _SOP1Op_S_SENDMSG_RTN_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOP1Op_S_SENDMSG_RTN_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If SDST is VCC then VCCZ is undefined. - VCC = Reg(vcc) - VCCZ = Reg(1 if VCC._val == 0 else 0) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result +def _SOP1Op_S_SENDMSG_RTN_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOP1Op_S_BARRIER_SIGNAL(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if !InWorkgroup() then - # elsif ((barrierNumber == -2) && !WAVE_STATUS.PRIV) then - # elsif barrierNumber == 0 then - # else - # BARRIER_STATE[barrierNumber & 63].signalCnt += 7'1U - # endif; - # --- compiled pseudocode --- - if not InWorkgroup(): - pass - elif ((barrierNumber == -2) and not WAVE_STATUS.PRIV): - pass - elif barrierNumber == 0: - pass - else: - BARRIER_STATE[barrierNumber & 63].signalCnt += 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result +def _SOP1Op_S_SLEEP_VAR(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOP1Op_S_BARRIER_SIGNAL_ISFIRST(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if !InWorkgroup() then - # SCC = 1'0U - # elsif ((barrierNumber == -2) && !WAVE_STATUS.PRIV) then - # SCC = 1'0U - # elsif barrierNumber == 0 then - # SCC = 1'0U - # else - # // Set SCC if this is the first signaling event for this barrier. - # SCC = BARRIER_STATE[barrierNumber & 63].signalCnt.u32 == 0U; - # BARRIER_STATE[barrierNumber & 63].signalCnt += 7'1U - # endif; - SCC = Reg(scc) - # --- compiled pseudocode --- - if not InWorkgroup(): - SCC = Reg(0) - elif ((barrierNumber == -2) and not WAVE_STATUS.PRIV): - SCC = Reg(0) - elif barrierNumber == 0: - SCC = Reg(0) - else: - SCC = Reg(BARRIER_STATE[barrierNumber & 63].signalCnt.u32 == 0) - BARRIER_STATE[barrierNumber & 63].signalCnt += 1 - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOP1Op_S_GET_BARRIER_STATE(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U({ 9'0, BARRIER_STATE[barrierNumber & 63].signalCnt.u7, 5'0, BARRIER_STATE[barrierNumber & - D0 = Reg(d0) - # --- compiled pseudocode --- - D0.u32 = ({ 0, BARRIER_STATE[barrierNumber & 63].signalCnt.u7, 0, BARRIER_STATE[barrierNumber] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result - -def _SOP1Op_S_ALLOC_VGPR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # n = ReallocVgprs(32'I(S0[8 : 0].u32)); - # if n < 0 then - # SCC = 1'0U - # else - # NUM_VGPRS = n; - # SCC = 1'1U - # endif - S0 = Reg(s0) - SCC = Reg(scc) - # --- compiled pseudocode --- - n = ReallocVgprs((S0[8 : 0].u32)) - if n < 0: - SCC = Reg(0) - else: - NUM_VGPRS = n - SCC = Reg(1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result - -def _SOP1Op_S_SLEEP_VAR(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S0[6:0] determines the sleep duration. The wave sleeps for (64*(S0[6:0]-1) … 64*S0[6:0]) clocks. The exact - S0 = Reg(s0) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result - -def _SOP1Op_S_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CVT_HI_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0[31 : 16].f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CVT_HI_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0[31 : 16].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP1Op_S_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP1Op_S_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} SOP1Op_FUNCTIONS = { SOP1Op.S_MOV_B32: _SOP1Op_S_MOV_B32, @@ -1445,10 +513,6 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_RFE_B64: _SOP1Op_S_RFE_B64, SOP1Op.S_SENDMSG_RTN_B32: _SOP1Op_S_SENDMSG_RTN_B32, SOP1Op.S_SENDMSG_RTN_B64: _SOP1Op_S_SENDMSG_RTN_B64, - SOP1Op.S_BARRIER_SIGNAL: _SOP1Op_S_BARRIER_SIGNAL, - SOP1Op.S_BARRIER_SIGNAL_ISFIRST: _SOP1Op_S_BARRIER_SIGNAL_ISFIRST, - SOP1Op.S_GET_BARRIER_STATE: _SOP1Op_S_GET_BARRIER_STATE, - SOP1Op.S_ALLOC_VGPR: _SOP1Op_S_ALLOC_VGPR, SOP1Op.S_SLEEP_VAR: _SOP1Op_S_SLEEP_VAR, SOP1Op.S_CEIL_F32: _SOP1Op_S_CEIL_F32, SOP1Op.S_FLOOR_F32: _SOP1Op_S_FLOOR_F32, @@ -1467,802 +531,282 @@ SOP1Op_FUNCTIONS = { SOP1Op.S_RNDNE_F16: _SOP1Op_S_RNDNE_F16, } -def _SOP2Op_S_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # SCC = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) SCC = Reg(((1) if (S1.u32 > S0.u32) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ADD_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i32 + S1.i32; - # SCC = ((S0.u32[31] == S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_CO_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i32 + S1.i32) SCC = Reg(((S0.u32[31] == S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.i32 - S1.i32; - # SCC = ((S0.u32[31] != S1.u32[31]) && (S0.u32[31] != tmp.u32[31])); - # D0.i32 = tmp.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_CO_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.i32 - S1.i32) SCC = Reg(((S0.u32[31] != S1.u32[31]) and (S0.u32[31] != tmp.u32[31]))) D0.i32 = tmp.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + SCC.u64; - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + SCC.u64) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - SCC.u32; - # SCC = 64'U(S1.u32) + SCC.u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - SCC.u32) SCC = Reg(((1) if ((S1.u32) + SCC.u64 > (S0.u32)) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ABSDIFF_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 - S1.i32; - # if D0.i32 < 0 then - # D0.i32 = -D0.i32 - # endif; - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ABSDIFF_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 - S1.i32 if D0.i32 < 0: D0.i32 = -D0.i32 SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 << S1[4 : 0].u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 << S1[4 : 0].u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 << S1[5 : 0].u32); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 << S1[5 : 0].u32) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 >> S1[4 : 0].u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 >> S1[4 : 0].u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 >> S1[5 : 0].u32); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 >> S1[5 : 0].u32) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ASHR_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i32) >> S1[4 : 0].u32); - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ASHR_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i32) >> S1[4 : 0].u32) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_ASHR_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32); - # SCC = D0.i64 != 0LL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_ASHR_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = (signext(S0.i64) >> S1[5 : 0].u32) SCC = Reg(D0.i64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL1_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 1U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL1_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 1) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL2_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 2U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL2_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 2) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL3_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 3U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL3_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 3) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_LSHL4_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (64'U(S0.u32) << 4U) + 64'U(S1.u32); - # SCC = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOP2Op_S_LSHL4_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32) << 4) + (S1.u32)) SCC = Reg(((1) if (tmp >= 0x100000000) else (0))) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < S1.i32; - # D0.i32 = SCC ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < S1.u32; - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= S1.i32; - # D0.i32 = SCC ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= S1.i32) D0.i32 = ((S0.i32) if (SCC) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= S1.u32; - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= S1.u32) D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 & S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 & S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 | S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 | S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 ^ S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 ^ S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NAND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 & S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NAND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 & S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NAND_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 & S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NAND_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 & S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 | S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 | S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_NOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 | S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_NOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 | S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_XNOR_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ~(S0.u64 ^ S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_XNOR_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ~(S0.u64 ^ S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & ~S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_NOT1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & ~S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_AND_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 & ~S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_AND_NOT1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 & ~S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_NOT1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | ~S1.u32); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_NOT1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | ~S1.u32) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_OR_NOT1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S0.u64 | ~S1.u64); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_OR_NOT1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S0.u64 | ~S1.u64) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S1[22 : 16].u32) - 1U)); - # SCC = D0.u32 != 0U - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC = Reg(D0.u32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)); - # D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32); - # SCC = D0.i32 != 0 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) +def _SOP2Op_S_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S1[22 : 16].u32) SCC = Reg(D0.i32 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1ULL << S1[22 : 16].u32) - 1ULL)); - # SCC = D0.u64 != 0ULL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_BFE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ((S0.u64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) SCC = Reg(D0.u64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1LL << S1[22 : 16].u32) - 1LL)); - # D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32); - # SCC = D0.i64 != 0LL - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) +def _SOP2Op_S_BFE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i64 = ((S0.i64 >> S1[5 : 0].u32) & ((1 << S1[22 : 16].u32) - 1)) D0.i64 = signext_from_bit(tmp.i64, S1[22 : 16].u32) SCC = Reg(D0.i64 != 0) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0, 'SCC': SCC} -def _SOP2Op_S_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_BFM_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (((1ULL << S0[5 : 0].u32) - 1ULL) << S1[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_BFM_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (((1 << S0[5 : 0].u32) - 1) << S1[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_MUL_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 * S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 * S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u32) * (S1.u32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i32) * (S1.i32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CSELECT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = SCC ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_CSELECT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (SCC) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CSELECT_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = SCC ? S0.u64 : S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOP2Op_S_CSELECT_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = ((S0.u64) if (SCC) else (S1.u64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_PACK_LL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[15 : 0].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_LL_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[15 : 0].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_LH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[31 : 16].u16, S0[15 : 0].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_LH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[31 : 16].u16, S0[15 : 0].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_HH_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[31 : 16].u16, S0[31 : 16].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_HH_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[31 : 16].u16, S0[31 : 16].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_PACK_HL_B32_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { S1[15 : 0].u16, S0[31 : 16].u16 } - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_PACK_HL_B32_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(S1[15 : 0].u16, S0[31 : 16].u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && - # !sign(S1.f32))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): @@ -2275,31 +819,9 @@ def _SOP2Op_S_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && - # sign(S1.f32))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): @@ -2312,114 +834,45 @@ def _SOP2Op_S_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _SOP2Op_S_FMAAK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _SOP2Op_S_FMAMK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _SOP2Op_S_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _SOP2Op_S_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && - # !sign(S1.f16))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MIN_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): @@ -2432,31 +885,9 @@ def _SOP2Op_S_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && - # sign(S1.f16))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MAX_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): @@ -2469,55 +900,17 @@ def _SOP2Op_S_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, D0.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, D0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S1.f32 - # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && - # !sign(S1.f32))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MINIMUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f32)): @@ -2532,33 +925,9 @@ def _SOP2Op_S_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S1.f32 - # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && - # sign(S1.f32))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MAXIMUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f32)): @@ -2573,33 +942,9 @@ def _SOP2Op_S_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S1.f16 - # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && - # !sign(S1.f16))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MINIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f16)): @@ -2614,33 +959,9 @@ def _SOP2Op_S_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S1.f16 - # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && - # sign(S1.f16))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MAXIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f16)): @@ -2655,45 +976,19 @@ def _SOP2Op_S_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOP2Op_S_ADD_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = S0.u64 + S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_ADD_NC_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = S0.u64 + S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_SUB_NC_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = S0.u64 - S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_SUB_NC_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = S0.u64 - S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _SOP2Op_S_MUL_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = S0.u64 * S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOP2Op_S_MUL_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = S0.u64 * S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} SOP2Op_FUNCTIONS = { SOP2Op.S_ADD_CO_U32: _SOP2Op_S_ADD_CO_U32, @@ -2772,523 +1067,189 @@ SOP2Op_FUNCTIONS = { SOP2Op.S_MUL_U64: _SOP2Op_S_MUL_U64, } -def _SOPCOp_S_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 == S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 != S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 > S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 >= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 < S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.i32 <= S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 == S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 != S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 > S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 >= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 < S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32 <= S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP0_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP0_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32[S1.u32[4 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP1_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u32[S1.u32[4 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP1_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u32[S1.u32[4 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP0_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'0U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP0_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64[S1.u32[5 : 0]] == 0) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_BITCMP1_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64[S1.u32[5 : 0]] == 1'1U - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_BITCMP1_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64[S1.u32[5 : 0]] == 1) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64 == S1.u64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.u64 != S1.u64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 < S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 < S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 == S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 == S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 <= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 <= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 > S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 > S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 <> S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 <> S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f32 >= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = S0.f16 >= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg(( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg((isNAN(F(S0.f32)) or isNAN(F(S1.f32)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg((isNAN(F(S0.f16)) or isNAN(F(S1.f16)))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 >= S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 >= S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 != S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 != S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 > S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 > S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 <= S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 <= S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 == S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 == S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f32 < S1.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} -def _SOPCOp_S_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # SCC = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPCOp_S_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SCC = Reg( not (S0.f16 < S1.f16)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - return result + return {'SCC': SCC} SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_EQ_I32: _SOPCOp_S_CMP_EQ_I32, @@ -3339,79 +1300,34 @@ SOPCOp_FUNCTIONS = { SOPCOp.S_CMP_NLT_F16: _SOPCOp_S_CMP_NLT_F16, } -def _SOPKOp_S_MOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOPKOp_S_MOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOPKOp_S_VERSION(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Do nothing - for use by tools only - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result +def _SOPKOp_S_VERSION(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOPKOp_S_CMOVK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC then - # D0.i32 = 32'I(signext(S0.i16)) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - # --- compiled pseudocode --- +def _SOPKOp_S_CMOVK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if SCC: D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0} -def _SOPKOp_S_ADDK_CO_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.i32; - # D0.i32 = D0.i32 + 32'I(signext(S0.i16)); - # SCC = ((tmp[31] == S0.i16[15]) && (tmp[31] != D0.i32[31])); - S0 = Reg(s0) - D0 = Reg(d0) - SCC = Reg(scc) - tmp = Reg(0) - # --- compiled pseudocode --- +def _SOPKOp_S_ADDK_CO_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.i32) D0.i32 = D0.i32 + (signext(S0.i16)) SCC = Reg(((tmp[31] == S0.i16[15]) and (tmp[31] != D0.i32[31]))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': SCC._val & 1} - return result + return {'D0': D0, 'SCC': SCC} -def _SOPKOp_S_MULK_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = D0.i32 * 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _SOPKOp_S_MULK_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = D0.i32 * (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _SOPKOp_S_CALL_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = PC + 4LL; - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - D0 = Reg(d0) +def _SOPKOp_S_CALL_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- D0.i64 = PC + 4 PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0, 'PC': PC} SOPKOp_FUNCTIONS = { SOPKOp.S_MOVK_I32: _SOPKOp_S_MOVK_I32, @@ -3422,194 +1338,85 @@ SOPKOp_FUNCTIONS = { SOPKOp.S_CALL_B64: _SOPKOp_S_CALL_B64, } -def _SOPPOp_S_NOP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # for i in 0U : SIMM16.u16[3 : 0].u32 do - # endfor +def _SOPPOp_S_NOP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) # --- compiled pseudocode --- for i in range(0, int(SIMM16.u16[3 : 0].u32)+1): pass - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _SOPPOp_S_DELAY_ALU(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # instruction may be omitted. For wave64 the compiler may not know the status of the EXEC mask and hence - # // 1 cycle delay here - # // 2 cycles delay here - EXEC = Reg(exec_mask) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result +def _SOPPOp_S_DELAY_ALU(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOPPOp_S_TRAP(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // PC passed into trap handler points to S_TRAP itself, - # PC = TBA.i64; - # // trap base address - PC = Reg(pc) - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result +def _SOPPOp_S_TRAP(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {'PC': PC} -def _SOPPOp_S_BARRIER_WAIT(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // barrierBit 0: reserved - # // barrierBit 1: workgroup - # // barrierBit 2: trap - # // Implemented as a power-saving idle - # --- compiled pseudocode --- - - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result +def _SOPPOp_S_BARRIER_WAIT(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + return {} -def _SOPPOp_S_BRANCH(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL; +def _SOPPOp_S_BRANCH(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_SCC0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - SCC = Reg(scc) +def _SOPPOp_S_CBRANCH_SCC0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if SCC == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'SCC': SCC, 'PC': PC} -def _SOPPOp_S_CBRANCH_SCC1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if SCC == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - SCC = Reg(scc) +def _SOPPOp_S_CBRANCH_SCC1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) # --- compiled pseudocode --- if SCC == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': SCC._val & 1} - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'SCC': SCC, 'PC': PC} -def _SOPPOp_S_CBRANCH_VCCZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If VCCZ is 1 then jump to a constant offset relative to the current PC. - # if VCCZ.u1 == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - VCC = Reg(vcc) +def _SOPPOp_S_CBRANCH_VCCZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- if VCCZ.u1 == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_VCCNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # If VCCZ is 0 then jump to a constant offset relative to the current PC. - # if VCCZ.u1 == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - VCC = Reg(vcc) +def _SOPPOp_S_CBRANCH_VCCNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) VCCZ = Reg(1 if VCC._val == 0 else 0) # --- compiled pseudocode --- if VCCZ.u1 == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_EXECZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if EXECZ.u1 == 1'1U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - EXEC = Reg(exec_mask) +def _SOPPOp_S_CBRANCH_EXECZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) EXECZ = Reg(1 if EXEC._val == 0 else 0) # --- compiled pseudocode --- if EXECZ.u1 == 1: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} -def _SOPPOp_S_CBRANCH_EXECNZ(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if EXECZ.u1 == 1'0U then - # PC = PC + signext(SIMM16.i16 * 16'4) + 4LL - # else - # PC = PC + 4LL - # endif - EXEC = Reg(exec_mask) +def _SOPPOp_S_CBRANCH_EXECNZ(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM16 = Reg(literal) - PC = Reg(pc) EXECZ = Reg(1 if EXEC._val == 0 else 0) # --- compiled pseudocode --- if EXECZ.u1 == 0: PC = Reg(PC + signext(SIMM16.i16 * 4) + 4) else: PC = Reg(PC + 4) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'PC': PC} SOPPOp_FUNCTIONS = { SOPPOp.S_NOP: _SOPPOp_S_NOP, @@ -3625,40 +1432,11 @@ SOPPOp_FUNCTIONS = { SOPPOp.S_CBRANCH_EXECNZ: _SOPPOp_S_CBRANCH_EXECNZ, } -def _VOP1Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'U; - # if WAVE64 then - # // 64 lanes - # if EXEC == 0x0LL then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b64(EXEC)); - # // Lowest active lane - # endif - # else - # // 32 lanes - # if EXEC_LO.i32 == 0 then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b32(EXEC_LO)); - # // Lowest active lane - # endif - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) +def _VOP1Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) EXEC_LO = SliceProxy(EXEC, 31, 0) # --- compiled pseudocode --- @@ -3673,935 +1451,368 @@ def _VOP1Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter else: lane = (s_ff1_i32_b32(EXEC_LO)) D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NEAREST_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_FLOOR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = floor(S0.f64 + 0.5) if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b16 = S0.b16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_MOV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b16 = S0.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + -floor(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + -floor(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32; - # // Can only raise integer DIV_BY_ZERO exception - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / S0.f64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / S0.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sin(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = cos(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CLZ_I32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CTZ_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.i32[31 - i] != S0.i32[31] then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CLS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f64) - 1023 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.i32 = 0 else: D0.i32 = exponent(S0.f64) - 1023 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = mantissa(S0.f64) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.f64 = S0.f64 else: D0.f64 = mantissa(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + -floor(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + -floor(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f32) - 127 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.i32 = 0 else: D0.i32 = exponent(S0.f32) - 127 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.f32 = S0.f32 - # else - # D0.f32 = mantissa(S0.f32) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.f32 = S0.f32 else: D0.f32 = mantissa(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # addr = SRC0.u32; - # // Raw value from instruction - # D0.b32 = VGPR[laneId][addr].b32 - D0 = Reg(d0) - laneId = lane +def _VOP1Op_V_MOVRELS_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- addr = SRC0.u32 D0.b32 = VGPR[laneId][addr].b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = u16_to_f16(S0.u16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = u16_to_f16(S0.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = i16_to_f16(S0.i16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = i16_to_f16(S0.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_u16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_u16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_i16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_i16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.f16 = S0.f16 - # else - # D0.f16 = mantissa(S0.f16) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_MANT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.f16 = S0.f16 else: D0.f16 = mantissa(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.i16 = 16'0 - # else - # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.i16 = 0 else: D0.i16 = (exponent(S0.f16) - 15 + 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + -floor(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_FRACT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + -floor(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_SIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sin(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_COS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = cos(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 16'0; - # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); - # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); - # D0.b16 = tmp.b16 - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SAT_PK_U8_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) D0.b16 = tmp.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_snorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_snorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_unorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_unorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SWAP_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.b32; - # D0.b32 = S0.b32; - # S0.b32 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SWAP_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.b32) D0.b32 = S0.b32 S0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_SWAP_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = D0.b16; - # D0.b16 = S0.b16; - # S0.b16 = tmp - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP1Op_V_SWAP_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(D0.b16) D0.b16 = S0.b16 S0.b16 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = ~S0.u16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_NOT_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ~S0.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { 16'0, S0.u16 } - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP1Op_V_CVT_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(0, S0.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if OPSEL[1 : 0].u2 == 2'0U then - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].fp8) - # elsif OPSEL[1 : 0].u2 == 2'2U then - # // Byte select bits are reversed - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].fp8) - # elsif OPSEL[1 : 0].u2 == 2'1U then - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].fp8) - # else - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].fp8) - # endif - D0 = Reg(d0) - laneId = lane +def _VOP1Op_V_CVT_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if OPSEL[1 : 0].u2 == 0: @@ -4612,23 +1823,9 @@ def _VOP1Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].fp8) else: D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].fp8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if OPSEL[1 : 0].u2 == 2'0U then - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].bf8) - # elsif OPSEL[1 : 0].u2 == 2'2U then - # // Byte select bits are reversed - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].bf8) - # elsif OPSEL[1 : 0].u2 == 2'1U then - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].bf8) - # else - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].bf8) - # endif - D0 = Reg(d0) - laneId = lane +def _VOP1Op_V_CVT_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if OPSEL[1 : 0].u2 == 0: @@ -4639,41 +1836,23 @@ def _VOP1Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].bf8) else: D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].bf8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; - # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); - # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) - D0 = Reg(d0) - tmp = Reg(0) - laneId = lane +def _VOP1Op_V_CVT_PK_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- tmp = Reg(((VGPR[laneId][SRC0.u32][31 : 16]) if (OPSEL[0].u1) else (VGPR[laneId][SRC0.u32][15 : 0]))) D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8) D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP1Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; - # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); - # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) - D0 = Reg(d0) - tmp = Reg(0) - laneId = lane +def _VOP1Op_V_CVT_PK_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- tmp = Reg(((VGPR[laneId][SRC0.u32][31 : 16]) if (OPSEL[0].u1) else (VGPR[laneId][SRC0.u32][15 : 0]))) D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8) D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP1Op_FUNCTIONS = { VOP1Op.V_MOV_B32: _VOP1Op_V_MOV_B32, @@ -4760,172 +1939,58 @@ VOP1Op_FUNCTIONS = { VOP1Op.V_CVT_PK_F32_BF8: _VOP1Op_V_CVT_PK_F32_BF8, } -def _VOP2Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP2Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP2Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S1.f32 - S0.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S1.f32 - S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP2Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = 0.0F - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = 0.0 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i24) * (S1.i24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u24) * (S1.u24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(S0.f64) && isNAN(S1.f64)) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 < S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && sign(S0.f64) && - # !sign(S1.f64))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_NUM_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): TRAPSTS.INVALID = 1 if (isNAN(S0.f64) and isNAN(S1.f64)): @@ -4938,32 +2003,9 @@ def _VOP2Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP2Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(S0.f64) && isNAN(S1.f64)) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 > S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && !sign(S0.f64) && - # sign(S1.f64))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_NUM_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): TRAPSTS.INVALID = 1 if (isNAN(S0.f64) and isNAN(S1.f64)): @@ -4976,76 +2018,25 @@ def _VOP2Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP2Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && - # !sign(S1.f32))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): @@ -5058,31 +2049,9 @@ def _VOP2Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && - # sign(S1.f32))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): @@ -5095,263 +2064,95 @@ def _VOP2Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 << S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 << S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = (S1.i32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S1.i32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 << S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LSHLREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 << S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP2Op_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP2Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 - S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 - S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S1.u32 - S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S1.u32 - S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAMK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAMK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, SIMM32.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAAK_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAAK_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f32 = fma(S0.f32, S1.f32, SIMM32.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP2Op_V_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP2Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && - # !sign(S1.f16))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MIN_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): @@ -5364,31 +2165,9 @@ def _VOP2Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && - # sign(S1.f16))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MAX_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): @@ -5401,112 +2180,48 @@ def _VOP2Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S1.f16 - S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S1.f16 - S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, D0.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, D0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAMK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAMK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f16 = fma(S0.f16, SIMM32.f16, S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_FMAAK_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP2Op_V_FMAAK_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SIMM32 = Reg(literal) # --- compiled pseudocode --- D0.f16 = fma(S0.f16, S1.f16, SIMM32.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP2Op_V_PK_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16); - # D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP2Op_V_PK_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, D0[31 : 16].f16) D0[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, D0[15 : 0].f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP2Op_FUNCTIONS = { VOP2Op.V_CNDMASK_B32: _VOP2Op_V_CNDMASK_B32, @@ -5560,1671 +2275,319 @@ VOP2Op_FUNCTIONS = { VOP2Op.V_PK_FMAC_F16: _VOP2Op_V_PK_FMAC_F16, } -def _VOP3Op_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -7238,54 +2601,9 @@ def _VOP3Op_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -7299,54 +2617,9 @@ def _VOP3Op_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOP3Op_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -7360,1091 +2633,321 @@ def _VOP3Op_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOP3Op_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 < S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f16 == S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 > S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <> S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 >= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 < S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f32 == S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 > S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <> S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 >= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 < S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f64 == S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 > S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <> S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 >= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 < S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i16 == S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 > S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <> S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 >= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 < S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u16 == S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 > S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <> S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 >= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 < S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i64 == S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 > S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <> S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 >= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 < S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 > S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 >= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -8458,46 +2961,9 @@ def _VOP3Op_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -8511,46 +2977,9 @@ def _VOP3Op_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -8564,45 +2993,13 @@ def _VOP3Op_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOP3Op_V_MOV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b32 = S0.b32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MOV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b32 = S0.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'U; - # if WAVE64 then - # // 64 lanes - # if EXEC == 0x0LL then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b64(EXEC)); - # // Lowest active lane - # endif - # else - # // 32 lanes - # if EXEC_LO.i32 == 0 then - # lane = 0U; - # // Force lane 0 if all lanes are disabled - # else - # lane = 32'U(s_ff1_i32_b32(EXEC_LO)); - # // Lowest active lane - # endif - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - D0 = Reg(d0) - EXEC = Reg(exec_mask) +def _VOP3Op_V_READFIRSTLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) EXEC_LO = SliceProxy(EXEC, 31, 0) # --- compiled pseudocode --- @@ -8617,905 +3014,356 @@ def _VOP3Op_V_READFIRSTLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, liter else: lane = (s_ff1_i32_b32(EXEC_LO)) D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if EXEC._val != exec_mask: result['exec'] = EXEC._val - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f64_to_i32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f64_to_i32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = i32_to_f64(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = i32_to_f64(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = i32_to_f32(S0.i32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = i32_to_f32(S0.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f32_to_u32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f32_to_u32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = f32_to_f16(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = f32_to_f16(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f16_to_f32(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f16_to_f32(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_NEAREST_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32 + 0.5F)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_NEAREST_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32 + 0.5)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_FLOOR_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = f32_to_i32(floor(S0.f32)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_FLOOR_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = f32_to_i32(floor(S0.f32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = f64_to_f32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = f64_to_f32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F64_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = f32_to_f64(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F64_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = f32_to_f64(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE0(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[7 : 0].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE0(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[7 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE1(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[15 : 8].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE1(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[15 : 8].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE2(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[23 : 16].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE2(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[23 : 16].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_UBYTE3(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = u32_to_f32(S0[31 : 24].u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F32_UBYTE3(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = u32_to_f32(S0[31 : 24].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = f64_to_u32(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = f64_to_u32(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = u32_to_f64(S0.u32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = u32_to_f64(S0.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_TRUNC_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRUNC_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_CEIL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 > 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CEIL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 > 0.0) and (S0.f64 != D0.f64)): D0.f64 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_RNDNE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = floor(S0.f64 + 0.5); - # if (isEven(floor(S0.f64)) && (fract(S0.f64) == 0.5)) then - # D0.f64 -= 1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RNDNE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = floor(S0.f64 + 0.5) if (isEven(floor(S0.f64)) and (fract(S0.f64) == 0.5)): D0.f64 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_FLOOR_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = trunc(S0.f64); - # if ((S0.f64 < 0.0) && (S0.f64 != D0.f64)) then - # D0.f64 += -1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FLOOR_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = trunc(S0.f64) if ((S0.f64 < 0.0) and (S0.f64 != D0.f64)): D0.f64 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MOV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.b16 = S0.b16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MOV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.b16 = S0.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FRACT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + -floor(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FRACT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + -floor(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_TRUNC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRUNC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CEIL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 > 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CEIL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 > 0.0) and (S0.f32 != D0.f32)): D0.f32 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RNDNE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = floor(S0.f32 + 0.5F); - # if (isEven(64'F(floor(S0.f32))) && (fract(S0.f32) == 0.5F)) then - # D0.f32 -= 1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RNDNE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = floor(S0.f32 + 0.5) if (isEven(F(floor(S0.f32))) and (fract(S0.f32) == 0.5)): D0.f32 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FLOOR_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = trunc(S0.f32); - # if ((S0.f32 < 0.0F) && (S0.f32 != D0.f32)) then - # D0.f32 += -1.0F - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FLOOR_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = trunc(S0.f32) if ((S0.f32 < 0.0) and (S0.f32 != D0.f32)): D0.f32 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_IFLAG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32; - # // Can only raise integer DIV_BY_ZERO exception - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_IFLAG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / S0.f64 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / S0.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_RSQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = 1.0 / sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RSQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = 1.0 / sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SQRT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = sqrt(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SQRT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = sqrt(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_SIN_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sin(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SIN_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sin(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_COS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = cos(S0.f32 * 32'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_COS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = cos(S0.f32 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_NOT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~S0.u32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_NOT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32[31 : 0] = S0.u32[0 : 31] - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32[31 : 0] = S0.u32[0 : 31] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CLZ_I32_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from MSB - # if S0.u32[31 - i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CLZ_I32_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[31 - i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CTZ_I32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if no ones are found - # for i in 0 : 31 do - # // Search from LSB - # if S0.u32[i] == 1'1U then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CTZ_I32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(0, int(31)+1): if S0.u32[i] == 1: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CLS_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = -1; - # // Set if all bits are the same - # for i in 1 : 31 do - # // Search from MSB - # if S0.i32[31 - i] != S0.i32[31] then - # D0.i32 = i; - # endif - # endfor - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CLS_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = -1 for i in range(1, int(31)+1): if S0.i32[31 - i] != S0.i32[31]: D0.i32 = i; break - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_EXP_I32_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f64) - 1023 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_EXP_I32_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.i32 = 0 else: D0.i32 = exponent(S0.f64) - 1023 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_MANT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S0.f64 == +INF) || (S0.f64 == -INF) || isNAN(S0.f64)) then - # D0.f64 = S0.f64 - # else - # D0.f64 = mantissa(S0.f64) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_MANT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S0.f64 == INF) or (S0.f64 == (-INF)) or isNAN(S0.f64)): D0.f64 = S0.f64 else: D0.f64 = mantissa(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_FRACT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + -floor(S0.f64) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FRACT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + -floor(S0.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_EXP_I32_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.i32 = 0 - # else - # D0.i32 = exponent(S0.f32) - 127 + 1 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_EXP_I32_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.i32 = 0 else: D0.i32 = exponent(S0.f32) - 127 + 1 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_MANT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == +INF) || (64'F(S0.f32) == -INF) || isNAN(64'F(S0.f32))) then - # D0.f32 = S0.f32 - # else - # D0.f32 = mantissa(S0.f32) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_MANT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == INF) or (F(S0.f32) == (-INF)) or isNAN(F(S0.f32))): D0.f32 = S0.f32 else: D0.f32 = mantissa(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MOVRELS_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # addr = SRC0.u32; - # // Raw value from instruction - # D0.b32 = VGPR[laneId][addr].b32 - D0 = Reg(d0) - laneId = lane +def _VOP3Op_V_MOVRELS_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- addr = SRC0.u32 D0.b32 = VGPR[laneId][addr].b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F16_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = u16_to_f16(S0.u16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F16_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = u16_to_f16(S0.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F16_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = i16_to_f16(S0.i16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_F16_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = i16_to_f16(S0.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_u16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_u16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_i16(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_i16(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_MANT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.f16 = S0.f16 - # else - # D0.f16 = mantissa(S0.f16) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_MANT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.f16 = S0.f16 else: D0.f16 = mantissa(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FREXP_EXP_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f16) == +INF) || (64'F(S0.f16) == -INF) || isNAN(64'F(S0.f16))) then - # D0.i16 = 16'0 - # else - # D0.i16 = 16'I(exponent(S0.f16) - 15 + 1) - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FREXP_EXP_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f16) == INF) or (F(S0.f16) == (-INF)) or isNAN(F(S0.f16))): D0.i16 = 0 else: D0.i16 = (exponent(S0.f16) - 15 + 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FLOOR_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 < 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += -16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FLOOR_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 < 0.0) and (S0.f16 != D0.f16)): D0.f16 += -1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CEIL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16); - # if ((S0.f16 > 16'0.0) && (S0.f16 != D0.f16)) then - # D0.f16 += 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CEIL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) if ((S0.f16 > 0.0) and (S0.f16 != D0.f16)): D0.f16 += 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_TRUNC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = trunc(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRUNC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = trunc(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_RNDNE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = floor(S0.f16 + 16'0.5); - # if (isEven(64'F(floor(S0.f16))) && (fract(S0.f16) == 16'0.5)) then - # D0.f16 -= 16'1.0 - # endif - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_RNDNE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = floor(S0.f16 + 0.5) if (isEven(F(floor(S0.f16))) and (fract(S0.f16) == 0.5)): D0.f16 -= 1.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FRACT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + -floor(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FRACT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + -floor(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SIN_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sin(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SIN_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sin(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_COS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = cos(S0.f16 * 16'F(PI * 2.0)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_COS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = cos(S0.f16 * F(PI * 2.0)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAT_PK_U8_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 16'0; - # tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16); - # tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16); - # D0.b16 = tmp.b16 - S0 = Reg(s0) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAT_PK_U8_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) tmp[7 : 0].u8 = SAT8(S0[15 : 0].i16) tmp[15 : 8].u8 = SAT8(S0[31 : 16].i16) D0.b16 = tmp.b16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = f16_to_snorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = f16_to_snorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = f16_to_unorm(S0.f16) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = f16_to_unorm(S0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_NOT_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = ~S0.u16 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_NOT_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ~S0.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(signext(S0.i16)) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (signext(S0.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0 = { 16'0, S0.u16 } - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0 = Reg(_pack(0, S0.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if OPSEL[1 : 0].u2 == 2'0U then - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].fp8) - # elsif OPSEL[1 : 0].u2 == 2'2U then - # // Byte select bits are reversed - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].fp8) - # elsif OPSEL[1 : 0].u2 == 2'1U then - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].fp8) - # else - # D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].fp8) - # endif - D0 = Reg(d0) - laneId = lane +def _VOP3Op_V_CVT_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if OPSEL[1 : 0].u2 == 0: @@ -9526,23 +3374,9 @@ def _VOP3Op_V_CVT_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].fp8) else: D0.f32 = fp8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].fp8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if OPSEL[1 : 0].u2 == 2'0U then - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][7 : 0].bf8) - # elsif OPSEL[1 : 0].u2 == 2'2U then - # // Byte select bits are reversed - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][15 : 8].bf8) - # elsif OPSEL[1 : 0].u2 == 2'1U then - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].bf8) - # else - # D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].bf8) - # endif - D0 = Reg(d0) - laneId = lane +def _VOP3Op_V_CVT_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if OPSEL[1 : 0].u2 == 0: @@ -9553,208 +3387,76 @@ def _VOP3Op_V_CVT_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][23 : 16].bf8) else: D0.f32 = bf8_to_f32(VGPR[laneId][SRC0.u32][31 : 24].bf8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_F32_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; - # D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8); - # D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) - D0 = Reg(d0) - tmp = Reg(0) - laneId = lane +def _VOP3Op_V_CVT_PK_F32_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- tmp = Reg(((VGPR[laneId][SRC0.u32][31 : 16]) if (OPSEL[0].u1) else (VGPR[laneId][SRC0.u32][15 : 0]))) D0[31 : 0].f32 = fp8_to_f32(tmp[7 : 0].fp8) D0[63 : 32].f32 = fp8_to_f32(tmp[15 : 8].fp8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_F32_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = OPSEL[0].u1 ? VGPR[laneId][SRC0.u32][31 : 16] : VGPR[laneId][SRC0.u32][15 : 0]; - # D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8); - # D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) - D0 = Reg(d0) - tmp = Reg(0) - laneId = lane +def _VOP3Op_V_CVT_PK_F32_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- tmp = Reg(((VGPR[laneId][SRC0.u32][31 : 16]) if (OPSEL[0].u1) else (VGPR[laneId][SRC0.u32][15 : 0]))) D0[31 : 0].f32 = bf8_to_f32(tmp[7 : 0].bf8) D0[63 : 32].f32 = bf8_to_f32(tmp[15 : 8].bf8) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CNDMASK_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = VCC.u64[laneId] ? S1.u32 : S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CNDMASK_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S1.u32) if (VCC.u64[laneId]) else (S0.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3Op_V_ADD_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 + S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 + S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_ADD_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 + S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 + S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 - S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 - S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUBREV_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S1.f32 - S0.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUBREV_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S1.f32 - S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MUL_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = 0.0F - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = 0.0 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i24) * 64'I(S1.i24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i24) * (S1.i24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u24) * 64'U(S1.u24)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u24) * (S1.u24)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(S0.f64) && isNAN(S1.f64)) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 < S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && sign(S0.f64) && - # !sign(S1.f64))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_NUM_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): TRAPSTS.INVALID = 1 if (isNAN(S0.f64) and isNAN(S1.f64)): @@ -9767,32 +3469,9 @@ def _VOP3Op_V_MIN_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(S0.f64) && isNAN(S1.f64)) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isNAN(S0.f64) then - # D0.f64 = S1.f64 - # elsif isNAN(S1.f64) then - # D0.f64 = S0.f64 - # elsif ((S0.f64 > S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && !sign(S0.f64) && - # sign(S1.f64))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_NUM_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): TRAPSTS.INVALID = 1 if (isNAN(S0.f64) and isNAN(S1.f64)): @@ -9805,76 +3484,25 @@ def _VOP3Op_V_MAX_NUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 < S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 < S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 >= S1.i32 ? S0.i32 : S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = ((S0.i32) if (S0.i32 >= S1.i32) else (S1.i32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 < S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 < S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 >= S1.u32 ? S0.u32 : S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32) if (S0.u32 >= S1.u32) else (S1.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && - # !sign(S1.f32))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): @@ -9887,31 +3515,9 @@ def _VOP3Op_V_MIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f32)) && isNAN(64'F(S1.f32))) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isNAN(64'F(S0.f32)) then - # D0.f32 = S1.f32 - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = S0.f32 - # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && - # sign(S1.f32))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f32)) and isNAN(F(S1.f32))): @@ -9924,179 +3530,65 @@ def _VOP3Op_V_MAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHLREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 << S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHLREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 << S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHRREV_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S1.u32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHRREV_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S1.u32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ASHRREV_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = (S1.i32 >> S0[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ASHRREV_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S1.i32 >> S0[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_AND_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 & S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_AND_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 & S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XNOR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ~(S0.u32 ^ S1.u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XNOR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ~(S0.u32 ^ S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHLREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 << S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHLREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 << S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 - S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 - S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUBREV_NC_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S1.u32 - S0.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUBREV_NC_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S1.u32 - S0.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMAC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, D0.f32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMAC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, D0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_RTZ_F16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # prev_mode = ROUND_MODE; - # tmp[15 : 0].f16 = f32_to_f16(S0.f32); - # tmp[31 : 16].f16 = f32_to_f16(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_RTZ_F16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- prev_mode = ROUND_MODE tmp[15 : 0].f16 = f32_to_f16(S0.f32) tmp[31 : 16].f16 = f32_to_f16(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && - # !sign(S1.f16))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): @@ -10109,31 +3601,9 @@ def _VOP3Op_V_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if (isNAN(64'F(S0.f16)) && isNAN(64'F(S1.f16))) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isNAN(64'F(S0.f16)) then - # D0.f16 = S1.f16 - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = S0.f16 - # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && - # sign(S1.f16))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if (isNAN(F(S0.f16)) and isNAN(F(S1.f16))): @@ -10146,150 +3616,48 @@ def _VOP3Op_V_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 + S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 + S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 - S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 - S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUBREV_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S1.f16 - S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUBREV_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S1.f16 - S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMAC_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, D0.f16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMAC_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, D0.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LDEXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = S0.f16 * 16'F(2.0F ** 32'I(S1.i16)) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LDEXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = S0.f16 * F(2.0 ** (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_DX9_ZERO_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((64'F(S0.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # // DX9 rules, 0.0 * x = 0.0 - # D0.f32 = S2.f32 - # else - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_DX9_ZERO_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((F(S0.f32) == 0.0) or (F(S1.f32) == 0.0)): D0.f32 = S2.f32 else: D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_I32_I24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i24) * 32'I(S1.i24) + S2.i32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_I32_I24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i24) * (S1.i24) + S2.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_U32_U24(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u24) * 32'U(S1.u24) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_U32_U24(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u24) * (S1.u24) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // Set D0.f = cubemap face ID ({0.0, 1.0, ..., 5.0}). - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # if S2.f32 < 0.0F then - # D0.f32 = 5.0F - # else - # D0.f32 = 4.0F - # endif - # elsif abs(S1.f32) >= abs(S0.f32) then - # if S1.f32 < 0.0F then - # D0.f32 = 3.0F - # else - # D0.f32 = 2.0F - # endif - # else - # if S0.f32 < 0.0F then - # D0.f32 = 1.0F - # else - # D0.f32 = 0.0F - # endif - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBEID_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): if S2.f32 < 0.0: D0.f32 = 5.0 @@ -10305,36 +3673,9 @@ def _VOP3Op_V_CUBEID_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0.f32 = 1.0 else: D0.f32 = 0.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = cubemap S coordinate. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # if S2.f32 < 0.0F then - # D0.f32 = -S0.f32 - # else - # D0.f32 = S0.f32 - # endif - # elsif abs(S1.f32) >= abs(S0.f32) then - # D0.f32 = S0.f32 - # else - # if S0.f32 < 0.0F then - # D0.f32 = S2.f32 - # else - # D0.f32 = -S2.f32 - # endif - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBESC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): if S2.f32 < 0.0: D0.f32 = -S0.f32 @@ -10347,32 +3688,9 @@ def _VOP3Op_V_CUBESC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0.f32 = S2.f32 else: D0.f32 = -S2.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = cubemap T coordinate. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # D0.f32 = -S1.f32 - # elsif abs(S1.f32) >= abs(S0.f32) then - # if S1.f32 < 0.0F then - # D0.f32 = -S2.f32 - # else - # D0.f32 = S2.f32 - # endif - # else - # D0.f32 = -S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBETC_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): D0.f32 = -S1.f32 elif abs(S1.f32) >= abs(S0.f32): @@ -10382,377 +3700,128 @@ def _VOP3Op_V_CUBETC_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGP D0.f32 = S2.f32 else: D0.f32 = -S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CUBEMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // D0.f = 2.0 * cubemap major axis. - # // XYZ coordinate is given in (S0.f, S1.f, S2.f). - # // S0.f = x - # // S1.f = y - # // S2.f = z - # if ((abs(S2.f32) >= abs(S0.f32)) && (abs(S2.f32) >= abs(S1.f32))) then - # D0.f32 = S2.f32 * 2.0F - # elsif abs(S1.f32) >= abs(S0.f32) then - # D0.f32 = S1.f32 * 2.0F - # else - # D0.f32 = S0.f32 * 2.0F - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_CUBEMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((abs(S2.f32) >= abs(S0.f32)) and (abs(S2.f32) >= abs(S1.f32))): D0.f32 = S2.f32 * 2.0 elif abs(S1.f32) >= abs(S0.f32): D0.f32 = S1.f32 * 2.0 else: D0.f32 = S0.f32 * 2.0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1U << S2[4 : 0].u32) - 1U)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)); - # D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_BFE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp.i32 = ((S0.i32 >> S1[4 : 0].u32) & ((1 << S2[4 : 0].u32) - 1)) D0.i32 = signext_from_bit(tmp.i32, S2[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFI_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFI_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 & S1.u32) | (~S0.u32 & S2.u32)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = fma(S0.f64, S1.f64, S2.f64) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_LERP_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = ((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1U << 24U); - # tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1U << 16U); - # tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1U << 8U); - # tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1U); - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_LERP_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(((S0.u32[31 : 24] + S1.u32[31 : 24] + S2.u32[24].u8) >> 1 << 24)) tmp += ((S0.u32[23 : 16] + S1.u32[23 : 16] + S2.u32[16].u8) >> 1 << 16) tmp += ((S0.u32[15 : 8] + S1.u32[15 : 8] + S2.u32[8].u8) >> 1 << 8) tmp += ((S0.u32[7 : 0] + S1.u32[7 : 0] + S2.u32[0].u8) >> 1) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ALIGNBIT_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> S2.u32[4 : 0]) & 0xffffffffLL) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ALIGNBIT_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((_pack32(S0.u32, S1.u32) >> S2.u32[4 : 0]) & 0xffffffff) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ALIGNBYTE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(({ S0.u32, S1.u32 } >> (S2.u32[1 : 0] * 8U)) & 0xffffffffLL) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ALIGNBYTE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((_pack32(S0.u32, S1.u32) >> (S2.u32[1 : 0] * 8)) & 0xffffffff) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MULLIT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if ((S1.f32 == -MAX_FLOAT_F32) || (64'F(S1.f32) == -INF) || isNAN(64'F(S1.f32)) || (S2.f32 <= 0.0F) || - # isNAN(64'F(S2.f32))) then - # D0.f32 = -MAX_FLOAT_F32 - # else - # D0.f32 = S0.f32 * S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MULLIT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if ((S1.f32 == -MAX_FLOAT_F32) or (F(S1.f32) == (-INF)) or isNAN(F(S1.f32)) or (S2.f32 <= 0.0) or isNAN(F(S2.f32))): D0.f32 = -MAX_FLOAT_F32 else: D0.f32 = S0.f32 * S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_min_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_min_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_max_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_max_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32 then - # D0.i32 = v_max_i32(S1.i32, S2.i32) - # elsif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32 then - # D0.i32 = v_max_i32(S0.i32, S2.i32) - # else - # D0.i32 = v_max_i32(S0.i32, S1.i32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_i32(S0.i32, S1.i32, S2.i32) == S0.i32: D0.i32 = v_max_i32(S1.i32, S2.i32) elif v_max3_i32(S0.i32, S1.i32, S2.i32) == S1.i32: D0.i32 = v_max_i32(S0.i32, S2.i32) else: D0.i32 = v_max_i32(S0.i32, S1.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32 then - # D0.u32 = v_max_u32(S1.u32, S2.u32) - # elsif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32 then - # D0.u32 = v_max_u32(S0.u32, S2.u32) - # else - # D0.u32 = v_max_u32(S0.u32, S1.u32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_u32(S0.u32, S1.u32, S2.u32) == S0.u32: D0.u32 = v_max_u32(S1.u32, S2.u32) elif v_max3_u32(S0.u32, S1.u32, S2.u32) == S1.u32: D0.u32 = v_max_u32(S0.u32, S2.u32) else: D0.u32 = v_max_u32(S0.u32, S1.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); - # tmp += 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); - # tmp += 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); - # tmp += 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += (ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])) tmp += (ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])) tmp += (ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])) tmp += (ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_HI_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (32'U(v_sad_u8(S0, S1, 0U)) << 16U) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_HI_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((v_sad_u8(S0, S1, 0)) << 16) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16); - # tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += ABSDIFF(S0[15 : 0].u16, S1[15 : 0].u16) tmp += ABSDIFF(S0[31 : 16].u16, S1[31 : 16].u16) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ABSDIFF(S0.u32, S1.u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_U8_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = (S2.u32 & 32'U(~(0xff << (S1.u32[1 : 0].u32 * 8U)))); - # tmp = (tmp | ((32'U(f32_to_u8(S0.f32)) & 255U) << (S1.u32[1 : 0].u32 * 8U))); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_CVT_PK_U8_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S2.u32 & (~(0xff << (S1.u32[1 : 0].u32 * 8))))) tmp = Reg((tmp | (((f32_to_u8(S0.f32)) & 255) << (S1.u32[1 : 0].u32 * 8)))) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f32) ^ sign(S2.f32)); - # if isNAN(64'F(S2.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S2.f32))) - # elsif isNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif ((64'F(S1.f32) == 0.0) && (64'F(S2.f32) == 0.0)) then - # // 0/0 - # D0.f32 = 32'F(0xffc00000) - # elsif ((64'F(abs(S1.f32)) == +INF) && (64'F(abs(S2.f32)) == +INF)) then - # // inf/inf - # D0.f32 = 32'F(0xffc00000) - # elsif ((64'F(S1.f32) == 0.0) || (64'F(abs(S2.f32)) == +INF)) then - # // x/0, or inf/y - # D0.f32 = sign_out ? -INF.f32 : +INF.f32 - # elsif ((64'F(abs(S1.f32)) == +INF) || (64'F(S2.f32) == 0.0)) then - # // x/inf, 0/y - # D0.f32 = sign_out ? -0.0F : 0.0F - # elsif exponent(S2.f32) - exponent(S1.f32) < -150 then - # D0.f32 = sign_out ? -UNDERFLOW_F32 : UNDERFLOW_F32 - # elsif exponent(S1.f32) == 255 then - # D0.f32 = sign_out ? -OVERFLOW_F32 : OVERFLOW_F32 - # else - # D0.f32 = sign_out ? -abs(S0.f32) : abs(S0.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FIXUP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f32) ^ sign(S2.f32)) if isNAN(F(S2.f32)): D0.f32 = F(cvtToQuietNAN(F(S2.f32))) @@ -10772,40 +3841,9 @@ def _VOP3Op_V_DIV_FIXUP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) else: D0.f32 = ((-OVERFLOW_F32) if (sign_out) else (OVERFLOW_F32)) if isNAN(S0.f32) else ((-abs(S0.f32)) if (sign_out) else (abs(S0.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f64) ^ sign(S2.f64)); - # if isNAN(S2.f64) then - # D0.f64 = cvtToQuietNAN(S2.f64) - # elsif isNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif ((S1.f64 == 0.0) && (S2.f64 == 0.0)) then - # // 0/0 - # D0.f64 = 64'F(0xfff8000000000000LL) - # elsif ((abs(S1.f64) == +INF) && (abs(S2.f64) == +INF)) then - # // inf/inf - # D0.f64 = 64'F(0xfff8000000000000LL) - # elsif ((S1.f64 == 0.0) || (abs(S2.f64) == +INF)) then - # // x/0, or inf/y - # D0.f64 = sign_out ? -INF : +INF - # elsif ((abs(S1.f64) == +INF) || (S2.f64 == 0.0)) then - # // x/inf, 0/y - # D0.f64 = sign_out ? -0.0 : 0.0 - # elsif exponent(S2.f64) - exponent(S1.f64) < -1075 then - # D0.f64 = sign_out ? -UNDERFLOW_F64 : UNDERFLOW_F64 - # elsif exponent(S1.f64) == 2047 then - # D0.f64 = sign_out ? -OVERFLOW_F64 : OVERFLOW_F64 - # else - # D0.f64 = sign_out ? -abs(S0.f64) : abs(S0.f64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FIXUP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f64) ^ sign(S2.f64)) if isNAN(S2.f64): D0.f64 = cvtToQuietNAN(S2.f64) @@ -10825,122 +3863,41 @@ def _VOP3Op_V_DIV_FIXUP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) else: D0.f64 = ((-OVERFLOW_F64) if (sign_out) else (OVERFLOW_F64)) if isNAN(S0.f64) else ((-abs(S0.f64)) if (sign_out) else (abs(S0.f64))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_min_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_min_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_max_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_max_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_min_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_min_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_max_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_max_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUM3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_minimum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUM3_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUM3_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_maximum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_minimum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUM3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_minimum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUM3_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_maximum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUM3_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_maximum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32)) || isNAN(64'F(S2.f32))) then - # D0.f32 = v_min3_num_f32(S0.f32, S1.f32, S2.f32) - # elsif v_max3_num_f32(S0.f32, S1.f32, S2.f32) == S0.f32 then - # D0.f32 = v_max_num_f32(S1.f32, S2.f32) - # elsif v_max3_num_f32(S0.f32, S1.f32, S2.f32) == S1.f32 then - # D0.f32 = v_max_num_f32(S0.f32, S2.f32) - # else - # D0.f32 = v_max_num_f32(S0.f32, S1.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isNAN(F(S0.f32)) or isNAN(F(S1.f32)) or isNAN(F(S2.f32))): D0.f32 = v_min3_num_f32(S0.f32, S1.f32, S2.f32) elif v_max3_num_f32(S0.f32, S1.f32, S2.f32) == S0.f32: @@ -10949,25 +3906,9 @@ def _VOP3Op_V_MED3_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0.f32 = v_max_num_f32(S0.f32, S2.f32) else: D0.f32 = v_max_num_f32(S0.f32, S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16)) || isNAN(64'F(S2.f16))) then - # D0.f16 = v_min3_num_f16(S0.f16, S1.f16, S2.f16) - # elsif v_max3_num_f16(S0.f16, S1.f16, S2.f16) == S0.f16 then - # D0.f16 = v_max_num_f16(S1.f16, S2.f16) - # elsif v_max3_num_f16(S0.f16, S1.f16, S2.f16) == S1.f16 then - # D0.f16 = v_max_num_f16(S0.f16, S2.f16) - # else - # D0.f16 = v_max_num_f16(S0.f16, S1.f16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isNAN(F(S0.f16)) or isNAN(F(S1.f16)) or isNAN(F(S2.f16))): D0.f16 = v_min3_num_f16(S0.f16, S1.f16, S2.f16) elif v_max3_num_f16(S0.f16, S1.f16, S2.f16) == S0.f16: @@ -10976,89 +3917,32 @@ def _VOP3Op_V_MED3_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V D0.f16 = v_max_num_f16(S0.f16, S2.f16) else: D0.f16 = v_max_num_f16(S0.f16, S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FMAS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if VCC.u64[laneId] then - # D0.f32 = 2.0F ** 32 * fma(S0.f32, S1.f32, S2.f32) - # else - # D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FMAS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if VCC.u64[laneId]: D0.f32 = (2.0 ** 64 if exponent(S2.f32) > 127 else 2.0 ** -64) * fma(S0.f32, S1.f32, S2.f32) else: D0.f32 = fma(S0.f32, S1.f32, S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FMAS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if VCC.u64[laneId] then - # D0.f64 = 2.0 ** 64 * fma(S0.f64, S1.f64, S2.f64) - # else - # D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FMAS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if VCC.u64[laneId]: D0.f64 = (2.0 ** 128 if exponent(S2.f64) > 1023 else 2.0 ** -128) * fma(S0.f64, S1.f64, S2.f64) else: D0.f64 = fma(S0.f64, S1.f64, S2.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MSAD_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # // UNSIGNED comparison - # tmp = S2.u32; - # tmp += S1.u32[7 : 0] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])); - # tmp += S1.u32[15 : 8] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])); - # tmp += S1.u32[23 : 16] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])); - # tmp += S1.u32[31 : 24] == 8'0U ? 0U : 32'U(ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_MSAD_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += ((0) if (S1.u32[7 : 0] == 0) else ((ABSDIFF(S0.u32[7 : 0], S1.u32[7 : 0])))) tmp += ((0) if (S1.u32[15 : 8] == 0) else ((ABSDIFF(S0.u32[15 : 8], S1.u32[15 : 8])))) tmp += ((0) if (S1.u32[23 : 16] == 0) else ((ABSDIFF(S0.u32[23 : 16], S1.u32[23 : 16])))) tmp += ((0) if (S1.u32[31 : 24] == 0) else ((ABSDIFF(S0.u32[31 : 24], S1.u32[31 : 24])))) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[63 : 48] = 16'B(v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); - # tmp[47 : 32] = 16'B(v_sad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); - # tmp[31 : 16] = 16'B(v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); - # tmp[15 : 0] = 16'B(v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)); - # D0.b64 = tmp.b64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_QSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[63 : 48] = (v_sad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)) @@ -11066,21 +3950,9 @@ def _VOP3Op_V_QSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, tmp[31 : 16] = (v_sad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)) tmp[15 : 0] = (v_sad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[63 : 48] = 16'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)); - # tmp[47 : 32] = 16'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[47 : 32].u32)); - # tmp[31 : 16] = 16'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)); - # tmp[15 : 0] = 16'B(v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)); - # D0.b64 = tmp.b64 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_MQSAD_PK_U16_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[63 : 48] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[63 : 48].u32)) @@ -11088,21 +3960,9 @@ def _VOP3Op_V_MQSAD_PK_U16_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal tmp[31 : 16] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[31 : 16].u32)) tmp[15 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[15 : 0].u32)) D0.b64 = tmp.b64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[127 : 96] = 32'B(v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)); - # tmp[95 : 64] = 32'B(v_msad_u8(S0[47 : 16], S1[31 : 0], S2[95 : 64].u32)); - # tmp[63 : 32] = 32'B(v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)); - # tmp[31 : 0] = 32'B(v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)); - # D0.b128 = tmp.b128 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3Op_V_MQSAD_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[127 : 96] = (v_msad_u8(S0[55 : 24], S1[31 : 0], S2[127 : 96].u32)) @@ -11110,232 +3970,78 @@ def _VOP3Op_V_MQSAD_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp[63 : 32] = (v_msad_u8(S0[39 : 8], S1[31 : 0], S2[63 : 32].u32)) tmp[31 : 0] = (v_msad_u8(S0[31 : 0], S1[31 : 0], S2[31 : 0].u32)) D0.b128 = tmp.b128 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XOR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XOR3_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32 ^ S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 + S2.u16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 + S2.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_PERM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 24] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[31 : 24]); - # D0[23 : 16] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[23 : 16]); - # D0[15 : 8] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[15 : 8]); - # D0[7 : 0] = BYTE_PERMUTE({ S0.u32, S1.u32 }, S2.u32[7 : 0]) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_PERM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 24] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[31 : 24]) D0[23 : 16] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[23 : 16]) D0[15 : 8] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[15 : 8]) D0[7 : 0] = BYTE_PERMUTE(_pack32(S0.u32, S1.u32), S2.u32[7 : 0]) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XAD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XAD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 ^ S1.u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHL_ADD_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHL_ADD_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 << S1.u32[4 : 0].u32) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_LSHL_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_LSHL_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 + S1.u32) << S2.u32[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = fma(S0.f16, S1.f16, S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = fma(S0.f16, S1.f16, S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = v_min_i16(v_min_i16(S0.i16, S1.i16), S2.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = v_min_u16(v_min_u16(S0.u16, S1.u16), S2.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = v_max_i16(v_max_i16(S0.i16, S1.i16), S2.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = v_max_u16(v_max_u16(S0.u16, S1.u16), S2.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16 then - # D0.i16 = v_max_i16(S1.i16, S2.i16) - # elsif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16 then - # D0.i16 = v_max_i16(S0.i16, S2.i16) - # else - # D0.i16 = v_max_i16(S0.i16, S1.i16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_i16(S0.i16, S1.i16, S2.i16) == S0.i16: D0.i16 = v_max_i16(S1.i16, S2.i16) elif v_max3_i16(S0.i16, S1.i16, S2.i16) == S1.i16: D0.i16 = v_max_i16(S0.i16, S2.i16) else: D0.i16 = v_max_i16(S0.i16, S1.i16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MED3_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16 then - # D0.u16 = v_max_u16(S1.u16, S2.u16) - # elsif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16 then - # D0.u16 = v_max_u16(S0.u16, S2.u16) - # else - # D0.u16 = v_max_u16(S0.u16, S1.u16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MED3_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if v_max3_u16(S0.u16, S1.u16, S2.u16) == S0.u16: D0.u16 = v_max_u16(S1.u16, S2.u16) elif v_max3_u16(S0.u16, S1.u16, S2.u16) == S1.u16: D0.u16 = v_max_u16(S0.u16, S2.u16) else: D0.u16 = v_max_u16(S0.u16, S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 * S1.i16 + S2.i16 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 * S1.i16 + S2.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # sign_out = (sign(S1.f16) ^ sign(S2.f16)); - # if isNAN(64'F(S2.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S2.f16))) - # elsif isNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif ((64'F(S1.f16) == 0.0) && (64'F(S2.f16) == 0.0)) then - # // 0/0 - # D0.f16 = 16'F(0xfe00) - # elsif ((64'F(abs(S1.f16)) == +INF) && (64'F(abs(S2.f16)) == +INF)) then - # // inf/inf - # D0.f16 = 16'F(0xfe00) - # elsif ((64'F(S1.f16) == 0.0) || (64'F(abs(S2.f16)) == +INF)) then - # // x/0, or inf/y - # D0.f16 = sign_out ? -INF.f16 : +INF.f16 - # elsif ((64'F(abs(S1.f16)) == +INF) || (64'F(S2.f16) == 0.0)) then - # // x/inf, 0/y - # D0.f16 = sign_out ? -16'0.0 : 16'0.0 - # else - # D0.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_DIV_FIXUP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): sign_out = (sign(S1.f16) ^ sign(S2.f16)) if isNAN(F(S2.f16)): D0.f16 = F(cvtToQuietNAN(F(S2.f16))) @@ -11351,739 +4057,280 @@ def _VOP3Op_V_DIV_FIXUP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, D0.f16 = ((-0.0) if (sign_out) else (0.0)) else: D0.f16 = ((-abs(S0.f16)) if (sign_out) else (abs(S0.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD3_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 + S1.u32 + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD3_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 + S1.u32 + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHL_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHL_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 << S1.u32[4 : 0].u32) | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_AND_OR_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = ((S0.u32 & S1.u32) | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_AND_OR_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = ((S0.u32 & S1.u32) | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_OR3_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (S0.u32 | S1.u32 | S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_OR3_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u32 | S1.u32 | S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_U32_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U(S0.u16) * 32'U(S1.u16) + S2.u32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_U32_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (S0.u16) * (S1.u16) + S2.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAD_I32_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I(S0.i16) * 32'I(S1.i16) + S2.i32 - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAD_I32_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (S0.i16) * (S1.i16) + S2.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CNDMASK_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = VCC.u64[laneId] ? S1.u16 : S0.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - # --- compiled pseudocode --- +def _VOP3Op_V_CNDMASK_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S1.u16) if (VCC.u64[laneId]) else (S0.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_min_u32(v_max_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = v_max_u32(v_min_u32(S0.u32, S1.u32), S2.u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_min_i32(v_max_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = v_max_i32(v_min_i32(S0.i32, S1.i32), S2.i32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DOT2_F16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f16; - # tmp += S0[15 : 0].f16 * S1[15 : 0].f16; - # tmp += S0[31 : 16].f16 * S1[31 : 16].f16; - # D0.f16 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_DOT2_F16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f16) tmp += S0[15 : 0].f16 * S1[15 : 0].f16 tmp += S0[31 : 16].f16 * S1[31 : 16].f16 D0.f16 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_DOT2_BF16_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.bf16; - # tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16; - # tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16; - # D0.bf16 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_DOT2_BF16_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.bf16) tmp += S0[15 : 0].bf16 * S1[15 : 0].bf16 tmp += S0[31 : 16].bf16 * S1[31 : 16].bf16 D0.bf16 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_max_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_max_num_f32(v_min_num_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_NUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_min_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_NUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_min_num_f32(v_max_num_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINMAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_max_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINMAX_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_max_num_f16(v_min_num_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXMIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_min_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXMIN_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_min_num_f16(v_max_num_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUMMAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_maximum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUMMAXIMUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_maximum_f32(v_minimum_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUMMINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = v_minimum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUMMINIMUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = v_minimum_f32(v_maximum_f32(S0.f32, S1.f32), S2.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUMMAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_maximum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUMMAXIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_maximum_f16(v_minimum_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUMMINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = v_minimum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUMMINIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = v_minimum_f16(v_maximum_f16(S0.f16, S1.f16), S2.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_EXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = pow(2.0F, S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_EXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = pow(2.0, S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_EXP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = pow(16'2.0, S0.f16); - # D0[31 : 16] = 16'0x0 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_EXP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = pow(2.0, S0.f16) D0[31 : 16] = 0x0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_LOG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = log2(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_LOG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = log2(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_LOG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = log2(S0.f16); - # D0[31 : 16] = 16'0x0 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_LOG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = log2(S0.f16) D0[31 : 16] = 0x0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_RCP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / S0.f32 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_RCP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / S0.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_RCP_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / S0.f16; - # D0[31 : 16] = 16'0x0 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_RCP_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / S0.f16 D0[31 : 16] = 0x0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_RSQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = 1.0F / sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_RSQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = 1.0 / sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_RSQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = 16'1.0 / sqrt(S0.f16); - # D0[31 : 16] = 16'0x0 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_RSQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = 1.0 / sqrt(S0.f16) D0[31 : 16] = 0x0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_SQRT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = sqrt(S0.f32) - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_SQRT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = sqrt(S0.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_S_SQRT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f16 = sqrt(S0.f16); - # D0[31 : 16] = 16'0x0 - S0 = Reg(s0) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_S_SQRT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f16 = sqrt(S0.f16) D0[31 : 16] = 0x0 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 + S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 + S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_NC_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 - S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 - S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 * S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = S0.u16 * S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[31 : 16] = 16'B(v_cvt_i16_f32(S1.f32)); - # tmp[15 : 0] = 16'B(v_cvt_i16_f32(S0.f32)); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_I16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16] = (v_cvt_i16_f32(S1.f32)) tmp[15 : 0] = (v_cvt_i16_f32(S0.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[31 : 16] = 16'B(v_cvt_u16_f32(S1.f32)); - # tmp[15 : 0] = 16'B(v_cvt_u16_f32(S0.f32)); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_U16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16] = (v_cvt_u16_f32(S1.f32)) tmp[15 : 0] = (v_cvt_u16_f32(S0.f32)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 >= S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 >= S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 >= S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 >= S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = S0.u16 < S1.u16 ? S0.u16 : S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = ((S0.u16) if (S0.u16 < S1.u16) else (S1.u16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 < S1.i16 ? S0.i16 : S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = ((S0.i16) if (S0.i16 < S1.i16) else (S1.i16)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 + S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 + S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_SUB_NC_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = S0.i16 - S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = S0.i16 - S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_PACK_B32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0[31 : 16].f16 = S1.f16; - # D0[15 : 0].f16 = S0.f16 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_PACK_B32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0[31 : 16].f16 = S1.f16 D0[15 : 0].f16 = S0.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_NORM_I16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = f16_to_snorm(S0.f16); - # tmp[31 : 16].i16 = f16_to_snorm(S1.f16); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_I16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = f16_to_snorm(S0.f16) tmp[31 : 16].i16 = f16_to_snorm(S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_NORM_U16_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = f16_to_unorm(S0.f16); - # tmp[31 : 16].u16 = f16_to_unorm(S1.f16); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_U16_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = f16_to_unorm(S0.f16) tmp[31 : 16].u16 = f16_to_unorm(S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_LDEXP_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f32 = S0.f32 * 2.0F ** S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LDEXP_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f32 = S0.f32 * 2.0 ** S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BFM_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = (((1U << S0[4 : 0].u32) - 1U) << S1[4 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_BFM_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((1 << S0[4 : 0].u32) - 1) << S1[4 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_BCNT_U32_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32; - # for i in 0 : 31 do - # tmp += S0[i].u32; - # // count i'th bit - # endfor; - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3Op_V_BCNT_U32_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32) for i in range(0, int(31)+1): tmp += S0[i].u32 D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_CVT_PK_NORM_I16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = f32_to_snorm(S0.f32); - # tmp[31 : 16].i16 = f32_to_snorm(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_I16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = f32_to_snorm(S0.f32) tmp[31 : 16].i16 = f32_to_snorm(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_NORM_U16_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = f32_to_unorm(S0.f32); - # tmp[31 : 16].u16 = f32_to_unorm(S1.f32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_NORM_U16_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = f32_to_unorm(S0.f32) tmp[31 : 16].u16 = f32_to_unorm(S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_U16_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = u32_to_u16(S0.u32); - # tmp[31 : 16].u16 = u32_to_u16(S1.u32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_U16_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = u32_to_u16(S0.u32) tmp[31 : 16].u16 = u32_to_u16(S1.u32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_CVT_PK_I16_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = i32_to_i16(S0.i32); - # tmp[31 : 16].i16 = i32_to_i16(S1.i32); - S0 = Reg(s0) - S1 = Reg(s1) +def _VOP3Op_V_CVT_PK_I16_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = i32_to_i16(S0.i32) tmp[31 : 16].i16 = i32_to_i16(S1.i32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - return result + return {} -def _VOP3Op_V_SUB_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 - S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_SUB_NC_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 - S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ADD_NC_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = S0.i32 + S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ADD_NC_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = S0.i32 + S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LDEXP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.f64 = S0.f64 * 2.0 ** S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LDEXP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.f64 = S0.f64 * 2.0 ** S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MUL_LO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = S0.u32 * S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_LO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = S0.u32 * S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u32 = 32'U((64'U(S0.u32) * 64'U(S1.u32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u32 = (((S0.u32) * (S1.u32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MUL_HI_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i32 = 32'I((64'I(S0.i32) * 64'I(S1.i32)) >> 32U) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MUL_HI_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i32 = (((S0.i32) * (S1.i32)) >> 32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # shift = 32'I(S1[4 : 0].u32) * 53; - # if exponent(S0.f64) > 1077 then - # shift += exponent(S0.f64) - 1077 - # endif; - # // (2.0/PI) == 0.{b_1200, b_1199, b_1198, ..., b_1, b_0} - # // b_1200 is the MSB of the fractional part of 2.0/PI - # // Left shift operation indicates which bits are brought - # result = 64'F((1201'B(2.0 / PI)[1200 : 0] << shift.u32) & 1201'0x1fffffffffffff); - # scale = -53 - shift; - # if exponent(S0.f64) >= 1968 then - # scale += 128 - # endif; - # D0.f64 = ldexp(result, scale) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_TRIG_PREOP_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): shift = (S1[4 : 0].u32) * 53 if exponent(S0.f64) > 1077: shift += exponent(S0.f64) - 1077 @@ -12092,91 +4339,29 @@ def _VOP3Op_V_TRIG_PREOP_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, if exponent(S0.f64) >= 1968: scale += 128 D0.f64 = ldexp(result, scale) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 << S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 << S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S1.u16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S1.u16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i16 = (S1.i16 >> S0[3 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i16 = (S1.i16 >> S0[3 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_LSHRREV_B64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64 = (S1.u64 >> S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_LSHRREV_B64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64 = (S1.u64 >> S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_ASHRREV_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.i64 = (S1.i64 >> S0[5 : 0].u32) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_ASHRREV_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.i64 = (S1.i64 >> S0[5 : 0].u32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(S0.f64) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isSignalNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif isQuietNAN(S0.f64) then - # D0.f64 = S0.f64 - # elsif isQuietNAN(S1.f64) then - # D0.f64 = S1.f64 - # elsif ((S0.f64 < S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && sign(S0.f64) && - # !sign(S1.f64))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUM_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): TRAPSTS.INVALID = 1 if isSignalNAN(S0.f64): @@ -12191,34 +4376,9 @@ def _VOP3Op_V_MINIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(S0.f64) || isSignalNAN(S1.f64)) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(S0.f64) then - # D0.f64 = cvtToQuietNAN(S0.f64) - # elsif isSignalNAN(S1.f64) then - # D0.f64 = cvtToQuietNAN(S1.f64) - # elsif isQuietNAN(S0.f64) then - # D0.f64 = S0.f64 - # elsif isQuietNAN(S1.f64) then - # D0.f64 = S1.f64 - # elsif ((S0.f64 > S1.f64) || ((abs(S0.f64) == 0.0) && (abs(S1.f64) == 0.0) && !sign(S0.f64) && - # sign(S1.f64))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f64 = S0.f64 - # else - # D0.f64 = S1.f64 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUM_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(S0.f64) or isSignalNAN(S1.f64)): TRAPSTS.INVALID = 1 if isSignalNAN(S0.f64): @@ -12233,23 +4393,9 @@ def _VOP3Op_V_MAXIMUM_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f64 = S0.f64 else: D0.f64 = S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - return result + return {'D0': D0} -def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare lane : 32'U; - # if WAVE32 then - # lane = S1.u32[4 : 0].u32; - # // Lane select for wave32 - # else - # lane = S1.u32[5 : 0].u32; - # // Lane select for wave64 - # endif; - # D0.b32 = VGPR[lane][SRC0.u32] - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3Op_V_READLANE_B32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): SRC0 = Reg(src0_idx) # --- compiled pseudocode --- if WAVE32: @@ -12257,66 +4403,21 @@ def _VOP3Op_V_READLANE_B32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V else: lane = S1.u32[5 : 0].u32 D0.b32 = VGPR[lane][SRC0.u32] - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_AND_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S0.u16 & S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_AND_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S0.u16 & S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_OR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S0.u16 | S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_OR_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S0.u16 | S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_XOR_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u16 = (S0.u16 ^ S1.u16) - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_XOR_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u16 = (S0.u16 ^ S1.u16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S1.f32 - # elsif ((S0.f32 < S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && sign(S0.f32) && - # !sign(S1.f32))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f32)): @@ -12331,33 +4432,9 @@ def _VOP3Op_V_MINIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f32)) || isSignalNAN(64'F(S1.f32))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S0.f32))) - # elsif isSignalNAN(64'F(S1.f32)) then - # D0.f32 = 32'F(cvtToQuietNAN(64'F(S1.f32))) - # elsif isQuietNAN(64'F(S0.f32)) then - # D0.f32 = S0.f32 - # elsif isQuietNAN(64'F(S1.f32)) then - # D0.f32 = S1.f32 - # elsif ((S0.f32 > S1.f32) || ((abs(S0.f32) == 0.0F) && (abs(S1.f32) == 0.0F) && !sign(S0.f32) && - # sign(S1.f32))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f32 = S0.f32 - # else - # D0.f32 = S1.f32 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUM_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f32)) or isSignalNAN(F(S1.f32))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f32)): @@ -12372,33 +4449,9 @@ def _VOP3Op_V_MAXIMUM_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f32 = S0.f32 else: D0.f32 = S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S1.f16 - # elsif ((S0.f16 < S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && sign(S0.f16) && - # !sign(S1.f16))) then - # // NOTE: -0<+0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MINIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f16)): @@ -12413,33 +4466,9 @@ def _VOP3Op_V_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3Op_V_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # if (isSignalNAN(64'F(S0.f16)) || isSignalNAN(64'F(S1.f16))) then - # TRAPSTS.INVALID = 1 - # endif; - # if isSignalNAN(64'F(S0.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S0.f16))) - # elsif isSignalNAN(64'F(S1.f16)) then - # D0.f16 = 16'F(cvtToQuietNAN(64'F(S1.f16))) - # elsif isQuietNAN(64'F(S0.f16)) then - # D0.f16 = S0.f16 - # elsif isQuietNAN(64'F(S1.f16)) then - # D0.f16 = S1.f16 - # elsif ((S0.f16 > S1.f16) || ((abs(S0.f16) == 16'0.0) && (abs(S1.f16) == 16'0.0) && !sign(S0.f16) && - # sign(S1.f16))) then - # // NOTE: +0>-0 is TRUE in this comparison - # D0.f16 = S0.f16 - # else - # D0.f16 = S1.f16 - # endif - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - # --- compiled pseudocode --- +def _VOP3Op_V_MAXIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if (isSignalNAN(F(S0.f16)) or isSignalNAN(F(S1.f16))): TRAPSTS.INVALID = 1 if isSignalNAN(F(S0.f16)): @@ -12454,9 +4483,7 @@ def _VOP3Op_V_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VG D0.f16 = S0.f16 else: D0.f16 = S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP3Op_FUNCTIONS = { VOP3Op.V_CMP_LT_F16: _VOP3Op_V_CMP_LT_F16, @@ -12878,102 +4905,26 @@ VOP3Op_FUNCTIONS = { VOP3Op.V_MAXIMUM_F16: _VOP3Op_V_MAXIMUM_F16, } -def _VOP3SDOp_V_ADD_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32) + VCC.u64[laneId].u64; - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_ADD_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32) + VCC.u64[laneId]) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUB_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S1.u32) + VCC.u64[laneId].u64 > 64'U(S0.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUB_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S1.u32) + VCC.u64[laneId] > (S0.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUBREV_CO_CI_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32 - VCC.u64[laneId].u32; - # VCC.u64[laneId] = 64'U(S0.u32) + VCC.u64[laneId].u64 > 64'U(S1.u32) ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUBREV_CO_CI_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32 - VCC.u64[laneId]) VCC.u64[laneId] = ((1) if ((S0.u32) + VCC.u64[laneId] > (S1.u32)) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC = 0x0LL; - # if ((64'F(S2.f32) == 0.0) || (64'F(S1.f32) == 0.0)) then - # D0.f32 = NAN.f32 - # elsif exponent(S2.f32) - exponent(S1.f32) >= 96 then - # // N/D near MAX_FLOAT_F32 - # VCC = 0x1LL; - # if S0.f32 == S1.f32 then - # // Only scale the denominator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif S1.f32 == DENORM.f32 then - # D0.f32 = ldexp(S0.f32, 64) - # elsif ((1.0 / 64'F(S1.f32) == DENORM.f64) && (S2.f32 / S1.f32 == DENORM.f32)) then - # VCC = 0x1LL; - # if S0.f32 == S1.f32 then - # // Only scale the denominator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif 1.0 / 64'F(S1.f32) == DENORM.f64 then - # D0.f32 = ldexp(S0.f32, -64) - # elsif S2.f32 / S1.f32 == DENORM.f32 then - # VCC = 0x1LL; - # if S0.f32 == S2.f32 then - # // Only scale the numerator - # D0.f32 = ldexp(S0.f32, 64) - # endif - # elsif exponent(S2.f32) <= 23 then - # // Numerator is tiny - # D0.f32 = ldexp(S0.f32, 64) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(s0) - VCC = Reg(vcc) +def _VOP3SDOp_V_DIV_SCALE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + D0 = Reg(S0._val) # --- compiled pseudocode --- VCC = Reg(0x0) if ((F(S2.f32) == 0.0) or (F(S1.f32) == 0.0)): @@ -12996,47 +4947,10 @@ def _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal VCC = Reg(0x1); D0.f32 = ldexp(S0.f32, 64) if S1.f32 == DENORM.f32: D0.f32 = float("nan") - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC = 0x0LL; - # if ((S2.f64 == 0.0) || (S1.f64 == 0.0)) then - # D0.f64 = NAN.f64 - # elsif exponent(S2.f64) - exponent(S1.f64) >= 768 then - # // N/D near MAX_FLOAT_F64 - # VCC = 0x1LL; - # if S0.f64 == S1.f64 then - # // Only scale the denominator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif S1.f64 == DENORM.f64 then - # D0.f64 = ldexp(S0.f64, 128) - # elsif ((1.0 / S1.f64 == DENORM.f64) && (S2.f64 / S1.f64 == DENORM.f64)) then - # VCC = 0x1LL; - # if S0.f64 == S1.f64 then - # // Only scale the denominator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif 1.0 / S1.f64 == DENORM.f64 then - # D0.f64 = ldexp(S0.f64, -128) - # elsif S2.f64 / S1.f64 == DENORM.f64 then - # VCC = 0x1LL; - # if S0.f64 == S2.f64 then - # // Only scale the numerator - # D0.f64 = ldexp(S0.f64, 128) - # endif - # elsif exponent(S2.f64) <= 53 then - # // Numerator is tiny - # D0.f64 = ldexp(S0.f64, 128) - # endif - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(s0) - VCC = Reg(vcc) +def _VOP3SDOp_V_DIV_SCALE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): + D0 = Reg(S0._val) # --- compiled pseudocode --- VCC = Reg(0x0) if ((S2.f64 == 0.0) or (S1.f64 == 0.0)): @@ -13059,105 +4973,41 @@ def _VOP3SDOp_V_DIV_SCALE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal D0.f64 = ldexp(S0.f64, 128) if S1.f64 == DENORM.f64: D0.f64 = float("nan") - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - result['d0_64'] = True - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_MAD_CO_U64_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # { D1.u1, D0.u64 } = 65'B(65'U(S0.u32) * 65'U(S1.u32) + 65'U(S2.u64)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3SDOp_V_MAD_CO_U64_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) # --- compiled pseudocode --- _full = ((S0.u32) * (S1.u32) + (S2.u64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - result['d1'] = D1._val & 1 - return result + return {'D0': D0, 'D1': D1} -def _VOP3SDOp_V_MAD_CO_I64_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # { D1.i1, D0.i64 } = 65'B(65'I(S0.i32) * 65'I(S1.i32) + 65'I(S2.i64)) - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3SDOp_V_MAD_CO_I64_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D1 = Reg(0) # --- compiled pseudocode --- _full = ((S0.i32) * (S1.i32) + (S2.i64)) D0.u64 = int(_full) & 0xffffffffffffffff D1 = Reg((int(_full) >> 64) & 1) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['d0_64'] = True - result['d1'] = D1._val & 1 - return result + return {'D0': D0, 'D1': D1} -def _VOP3SDOp_V_ADD_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = 64'U(S0.u32) + 64'U(S1.u32); - # VCC.u64[laneId] = tmp >= 0x100000000ULL ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_ADD_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_ADD_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg((S0.u32) + (S1.u32)) VCC.u64[laneId] = ((1) if (tmp >= 0x100000000) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUB_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S0.u32 - S1.u32; - # VCC.u64[laneId] = S1.u32 > S0.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUB_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S0.u32 - S1.u32) VCC.u64[laneId] = ((1) if (S1.u32 > S0.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} -def _VOP3SDOp_V_SUBREV_CO_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S1.u32 - S0.u32; - # VCC.u64[laneId] = S0.u32 > S1.u32 ? 1'1U : 1'0U; - # // VCC is an UNSIGNED overflow/carry-out for V_SUB_CO_CI_U32. - # D0.u32 = tmp.u32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - tmp = Reg(0) - laneId = lane - # --- compiled pseudocode --- +def _VOP3SDOp_V_SUBREV_CO_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S1.u32 - S0.u32) VCC.u64[laneId] = ((1) if (S0.u32 > S1.u32) else (0)) D0.u32 = tmp.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - result['vcc_lane'] = (VCC._val >> lane) & 1 - return result + return {'D0': D0, 'VCC': VCC} VOP3SDOp_FUNCTIONS = { VOP3SDOp.V_ADD_CO_CI_U32: _VOP3SDOp_V_ADD_CO_CI_U32, @@ -13172,353 +5022,159 @@ VOP3SDOp_FUNCTIONS = { VOP3SDOp.V_SUBREV_CO_U32: _VOP3SDOp_V_SUBREV_CO_U32, } -def _VOP3POp_V_PK_MAD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = S0[15 : 0].i16 * S1[15 : 0].i16 + S2[15 : 0].i16 tmp[31 : 16].i16 = S0[31 : 16].i16 * S1[31 : 16].i16 + S2[31 : 16].i16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_LO_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16; - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_LO_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = S0[15 : 0].i16 + S1[15 : 0].i16 tmp[31 : 16].i16 = S0[31 : 16].i16 + S1[31 : 16].i16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_SUB_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_SUB_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = S0[15 : 0].i16 - S1[15 : 0].i16 tmp[31 : 16].i16 = S0[31 : 16].i16 - S1[31 : 16].i16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_LSHLREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_LSHLREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = (S1[31 : 16].u16 << S0.u32[19 : 16].u32) tmp[15 : 0].u16 = (S1[15 : 0].u16 << S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_LSHRREV_B16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_LSHRREV_B16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].u16 = (S1[31 : 16].u16 >> S0.u32[19 : 16].u32) tmp[15 : 0].u16 = (S1[15 : 0].u16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ASHRREV_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32); - # tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32); - # D0.b32 = tmp.b32 - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ASHRREV_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[31 : 16].i16 = (S1[31 : 16].i16 >> S0.u32[19 : 16].u32) tmp[15 : 0].i16 = (S1[15 : 0].i16 >> S0.u32[3 : 0].u32) D0.b32 = tmp.b32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 >= S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 >= S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 >= S1[15 : 0].i16) else (S1[15 : 0].i16)) tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 >= S1[31 : 16].i16) else (S1[31 : 16].i16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].i16 = S0[15 : 0].i16 < S1[15 : 0].i16 ? S0[15 : 0].i16 : S1[15 : 0].i16; - # tmp[31 : 16].i16 = S0[31 : 16].i16 < S1[31 : 16].i16 ? S0[31 : 16].i16 : S1[31 : 16].i16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].i16 = ((S0[15 : 0].i16) if (S0[15 : 0].i16 < S1[15 : 0].i16) else (S1[15 : 0].i16)) tmp[31 : 16].i16 = ((S0[31 : 16].i16) if (S0[31 : 16].i16 < S1[31 : 16].i16) else (S1[31 : 16].i16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = S0[15 : 0].u16 * S1[15 : 0].u16 + S2[15 : 0].u16 tmp[31 : 16].u16 = S0[31 : 16].u16 * S1[31 : 16].u16 + S2[31 : 16].u16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = S0[15 : 0].u16 + S1[15 : 0].u16 tmp[31 : 16].u16 = S0[31 : 16].u16 + S1[31 : 16].u16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_SUB_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_SUB_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = S0[15 : 0].u16 - S1[15 : 0].u16 tmp[31 : 16].u16 = S0[31 : 16].u16 - S1[31 : 16].u16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 >= S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 >= S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 >= S1[15 : 0].u16) else (S1[15 : 0].u16)) tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 >= S1[31 : 16].u16) else (S1[31 : 16].u16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].u16 = S0[15 : 0].u16 < S1[15 : 0].u16 ? S0[15 : 0].u16 : S1[15 : 0].u16; - # tmp[31 : 16].u16 = S0[31 : 16].u16 < S1[31 : 16].u16 ? S0[31 : 16].u16 : S1[31 : 16].u16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].u16 = ((S0[15 : 0].u16) if (S0[15 : 0].u16 < S1[15 : 0].u16) else (S1[15 : 0].u16)) tmp[31 : 16].u16 = ((S0[31 : 16].u16) if (S0[31 : 16].u16 < S1[31 : 16].u16) else (S1[31 : 16].u16)) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_FMA_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16); - # tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) +def _VOP3POp_V_PK_FMA_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = fma(S0[15 : 0].f16, S1[15 : 0].f16, S2[15 : 0].f16) tmp[31 : 16].f16 = fma(S0[31 : 16].f16, S1[31 : 16].f16, S2[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_ADD_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16; - # tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_ADD_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = S0[15 : 0].f16 + S1[15 : 0].f16 tmp[31 : 16].f16 = S0[31 : 16].f16 + S1[31 : 16].f16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MUL_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16; - # tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16; - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MUL_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = S0[15 : 0].f16 * S1[15 : 0].f16 tmp[31 : 16].f16 = S0[31 : 16].f16 * S1[31 : 16].f16 D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_F32_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16); - # tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_F32_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += f16_to_f32(S0[15 : 0].f16) * f16_to_f32(S1[15 : 0].f16) tmp += f16_to_f32(S0[31 : 16].f16) * f16_to_f32(S1[31 : 16].f16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_U32_U8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8); - # tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8); - # tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8); - # tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_U32_U8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u8_to_u32(S0[7 : 0].u8) * u8_to_u32(S1[7 : 0].u8) tmp += u8_to_u32(S0[15 : 8].u8) * u8_to_u32(S1[15 : 8].u8) tmp += u8_to_u32(S0[23 : 16].u8) * u8_to_u32(S1[23 : 16].u8) tmp += u8_to_u32(S0[31 : 24].u8) * u8_to_u32(S1[31 : 24].u8) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.u32; - # tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4); - # tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4); - # tmp += u4_to_u32(S0[11 : 8].u4) * u4_to_u32(S1[11 : 8].u4); - # tmp += u4_to_u32(S0[15 : 12].u4) * u4_to_u32(S1[15 : 12].u4); - # tmp += u4_to_u32(S0[19 : 16].u4) * u4_to_u32(S1[19 : 16].u4); - # tmp += u4_to_u32(S0[23 : 20].u4) * u4_to_u32(S1[23 : 20].u4); - # tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4); - # tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4); - # D0.u32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT8_U32_U4(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.u32) tmp += u4_to_u32(S0[3 : 0].u4) * u4_to_u32(S1[3 : 0].u4) tmp += u4_to_u32(S0[7 : 4].u4) * u4_to_u32(S1[7 : 4].u4) @@ -13529,188 +5185,82 @@ def _VOP3POp_V_DOT8_U32_U4(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, V tmp += u4_to_u32(S0[27 : 24].u4) * u4_to_u32(S1[27 : 24].u4) tmp += u4_to_u32(S0[31 : 28].u4) * u4_to_u32(S1[31 : 28].u4) D0.u32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT2_F32_BF16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16); - # tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT2_F32_BF16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += bf16_to_f32(S0[15 : 0].bf16) * bf16_to_f32(S1[15 : 0].bf16) tmp += bf16_to_f32(S0[31 : 16].bf16) * bf16_to_f32(S1[31 : 16].bf16) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MIN_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_min_num_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_min_num_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MIN_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = v_min_num_f16(S0[15 : 0].f16, S1[15 : 0].f16) tmp[31 : 16].f16 = v_min_num_f16(S0[31 : 16].f16, S1[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAX_NUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_max_num_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_max_num_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAX_NUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = v_max_num_f16(S0[15 : 0].f16, S1[15 : 0].f16) tmp[31 : 16].f16 = v_max_num_f16(S0[31 : 16].f16, S1[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MINIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_minimum_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_minimum_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MINIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = v_minimum_f16(S0[15 : 0].f16, S1[15 : 0].f16) tmp[31 : 16].f16 = v_minimum_f16(S0[31 : 16].f16, S1[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_PK_MAXIMUM_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # declare tmp : 32'B; - # tmp[15 : 0].f16 = v_maximum_f16(S0[15 : 0].f16, S1[15 : 0].f16); - # tmp[31 : 16].f16 = v_maximum_f16(S0[31 : 16].f16, S1[31 : 16].f16); - # D0.b32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) +def _VOP3POp_V_PK_MAXIMUM_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(0) # --- compiled pseudocode --- tmp[15 : 0].f16 = v_maximum_f16(S0[15 : 0].f16, S1[15 : 0].f16) tmp[31 : 16].f16 = v_maximum_f16(S0[31 : 16].f16, S1[31 : 16].f16) D0.b32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_F32_FP8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += 32'F(S0[7 : 0].fp8) * 32'F(S1[7 : 0].bf8); - # tmp += 32'F(S0[15 : 8].fp8) * 32'F(S1[15 : 8].bf8); - # tmp += 32'F(S0[23 : 16].fp8) * 32'F(S1[23 : 16].bf8); - # tmp += 32'F(S0[31 : 24].fp8) * 32'F(S1[31 : 24].bf8); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_F32_FP8_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += F(S0[7 : 0].fp8) * F(S1[7 : 0].bf8) tmp += F(S0[15 : 8].fp8) * F(S1[15 : 8].bf8) tmp += F(S0[23 : 16].fp8) * F(S1[23 : 16].bf8) tmp += F(S0[31 : 24].fp8) * F(S1[31 : 24].bf8) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_F32_BF8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += 32'F(S0[7 : 0].bf8) * 32'F(S1[7 : 0].fp8); - # tmp += 32'F(S0[15 : 8].bf8) * 32'F(S1[15 : 8].fp8); - # tmp += 32'F(S0[23 : 16].bf8) * 32'F(S1[23 : 16].fp8); - # tmp += 32'F(S0[31 : 24].bf8) * 32'F(S1[31 : 24].fp8); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_F32_BF8_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += F(S0[7 : 0].bf8) * F(S1[7 : 0].fp8) tmp += F(S0[15 : 8].bf8) * F(S1[15 : 8].fp8) tmp += F(S0[23 : 16].bf8) * F(S1[23 : 16].fp8) tmp += F(S0[31 : 24].bf8) * F(S1[31 : 24].fp8) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_F32_FP8_FP8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += 32'F(S0[7 : 0].fp8) * 32'F(S1[7 : 0].fp8); - # tmp += 32'F(S0[15 : 8].fp8) * 32'F(S1[15 : 8].fp8); - # tmp += 32'F(S0[23 : 16].fp8) * 32'F(S1[23 : 16].fp8); - # tmp += 32'F(S0[31 : 24].fp8) * 32'F(S1[31 : 24].fp8); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_F32_FP8_FP8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += F(S0[7 : 0].fp8) * F(S1[7 : 0].fp8) tmp += F(S0[15 : 8].fp8) * F(S1[15 : 8].fp8) tmp += F(S0[23 : 16].fp8) * F(S1[23 : 16].fp8) tmp += F(S0[31 : 24].fp8) * F(S1[31 : 24].fp8) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} -def _VOP3POp_V_DOT4_F32_BF8_BF8(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # tmp = S2.f32; - # tmp += 32'F(S0[7 : 0].bf8) * 32'F(S1[7 : 0].bf8); - # tmp += 32'F(S0[15 : 8].bf8) * 32'F(S1[15 : 8].bf8); - # tmp += 32'F(S0[23 : 16].bf8) * 32'F(S1[23 : 16].bf8); - # tmp += 32'F(S0[31 : 24].bf8) * 32'F(S1[31 : 24].bf8); - # D0.f32 = tmp - S0 = Reg(s0) - S1 = Reg(s1) - S2 = Reg(s2) - D0 = Reg(d0) - tmp = Reg(0) - # --- compiled pseudocode --- +def _VOP3POp_V_DOT4_F32_BF8_BF8(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): tmp = Reg(S2.f32) tmp += F(S0[7 : 0].bf8) * F(S1[7 : 0].bf8) tmp += F(S0[15 : 8].bf8) * F(S1[15 : 8].bf8) tmp += F(S0[23 : 16].bf8) * F(S1[23 : 16].bf8) tmp += F(S0[31 : 24].bf8) * F(S1[31 : 24].bf8) D0.f32 = tmp - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - return result + return {'D0': D0} VOP3POp_FUNCTIONS = { VOP3POp.V_PK_MAD_I16: _VOP3POp_V_PK_MAD_I16, @@ -13744,1671 +5294,319 @@ VOP3POp_FUNCTIONS = { VOP3POp.V_DOT4_F32_BF8_BF8: _VOP3POp_V_DOT4_F32_BF8_BF8, } -def _VOPCOp_V_CMP_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 < S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f16 == S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f16 > S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 <> S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f16 >= S1.f16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 < S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f32 == S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f32 > S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 <> S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f32 >= S1.f32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 < S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.f64 == S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.f64 > S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 <> S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.f64 >= S1.f64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is orderable to the second input. Store the result into VCC - # D0.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)); - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # VCC or a scalar register. - # D0.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not less than the second input. Store the result into VCC - # D0.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 < S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i16 == S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 <= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 > S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i16 <> S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i16 >= S1.i16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 < S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u16 == S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 <= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 > S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u16 <> S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u16 >= S1.u16; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 < S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i32 == S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 <= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 > S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i32 <> S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i32 >= S1.i32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 < S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u32 == S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 <= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 > S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u32 <> S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u32 >= S1.u32; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 < S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.i64 == S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 <= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 > S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.i64 <> S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.i64 >= S1.i64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is less than the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 < S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into VCC or a - # D0.u64[laneId] = S0.u64 == S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 <= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is greater than the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 > S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is not equal to the second input. Store the result into VCC - # D0.u64[laneId] = S0.u64 <> S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # D0.u64[laneId] = S0.u64 >= S1.u64; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): D0.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # half-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -15422,54 +5620,9 @@ def _VOPCOp_V_CMP_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # single-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -15483,54 +5636,9 @@ def _VOPCOp_V_CMP_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # double-precision float, and set the per-lane condition code to the result. Store the result into VCC or a scalar - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # D0.u64[laneId] = result; - # // D0 = VCC in VOPC encoding. - S0 = Reg(s0) - S1 = Reg(s1) - D0 = Reg(d0) - VCC = Reg(vcc) - laneId = lane - PC = Reg(pc) - # --- compiled pseudocode --- +def _VOPCOp_V_CMP_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -15544,1091 +5652,321 @@ def _VOPCOp_V_CMP_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] D0.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': D0._val, 'scc': scc & 1} - if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1 - result['vcc_lane'] = (D0._val >> lane) & 1 - result['d0_64'] = True - _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000 - result['new_pc'] = _pc - return result + return {'D0': D0} -def _VOPCOp_V_CMPX_LT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 < S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 < S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f16 == S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 == S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 <= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 > S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 > S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 <> S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 != S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f16 >= S1.f16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f16 >= S1.f16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f16)) && !isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f16)) and not isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f16)) || isNAN(64'F(S1.f16))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f16)) or isNAN(F(S1.f16))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 >= S1.f16); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 >= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <> S1.f16); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 != S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 > S1.f16); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 > S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 <= S1.f16); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 <= S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 == S1.f16); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 == S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f16 < S1.f16); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f16 < S1.f16) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 < S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 < S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f32 == S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 == S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 <= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 > S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 > S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 <> S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 != S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f32 >= S1.f32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f32 >= S1.f32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(64'F(S0.f32)) && !isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(F(S0.f32)) and not isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(64'F(S0.f32)) || isNAN(64'F(S1.f32))) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(F(S0.f32)) or isNAN(F(S1.f32))) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 >= S1.f32); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 >= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <> S1.f32); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 != S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 > S1.f32); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 > S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 <= S1.f32); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 <= S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 == S1.f32); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 == S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f32 < S1.f32); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f32 < S1.f32) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 < S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 < S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.f64 == S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 == S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 <= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 > S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 > S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 <> S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 != S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.f64 >= S1.f64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.f64 >= S1.f64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_O_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (!isNAN(S0.f64) && !isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_O_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = ( not isNAN(S0.f64) and not isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_U_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = (isNAN(S0.f64) || isNAN(S1.f64)) - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_U_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = (isNAN(S0.f64) or isNAN(S1.f64)) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 >= S1.f64); - # // With NAN inputs this is not the same operation as < - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 >= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLG_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <> S1.f64); - # // With NAN inputs this is not the same operation as == - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLG_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 != S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NGT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 > S1.f64); - # // With NAN inputs this is not the same operation as <= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NGT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 > S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLE_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 <= S1.f64); - # // With NAN inputs this is not the same operation as > - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLE_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 <= S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NEQ_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 == S1.f64); - # // With NAN inputs this is not the same operation as != - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NEQ_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 == S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NLT_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = !(S0.f64 < S1.f64); - # // With NAN inputs this is not the same operation as >= - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NLT_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = not (S0.f64 < S1.f64) - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 < S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 < S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i16 == S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 == S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 <= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 > S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 > S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 <> S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 != S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i16 >= S1.i16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i16 >= S1.i16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 < S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 < S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u16 == S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 == S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 <= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 > S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 > S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 <> S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 != S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u16 >= S1.u16 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u16 >= S1.u16 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 < S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 < S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i32 == S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 == S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 <= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 > S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 > S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 <> S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 != S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i32 >= S1.i32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i32 >= S1.i32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 < S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 < S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u32 == S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 == S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 <= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 > S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 > S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 <> S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 != S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u32 >= S1.u32 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u32 >= S1.u32 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 < S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 < S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.i64 == S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 == S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 <= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 > S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 > S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 <> S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 != S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_I64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.i64 >= S1.i64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_I64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.i64 >= S1.i64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 < S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 < S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_EQ_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # Set the per-lane condition code to 1 iff the first input is equal to the second input. Store the result into the EXEC - # EXEC.u64[laneId] = S0.u64 == S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_EQ_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 == S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_LE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_LE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 <= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GT_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 > S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GT_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 > S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_NE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 <> S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_NE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 != S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_GE_U64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # EXEC.u64[laneId] = S0.u64 >= S1.u64 - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_GE_U64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): EXEC.u64[laneId] = S0.u64 >= S1.u64 - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f16)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f16)) then - # result = S1.u32[1] - # elsif exponent(S0.f16) == 31 then - # // +-INF - # result = S1.u32[sign(S0.f16) ? 2 : 9] - # elsif exponent(S0.f16) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f16) ? 3 : 8] - # elsif 64'F(abs(S0.f16)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f16) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f16) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F16(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f16)): result = S1.u32[0] elif isQuietNAN(F(S0.f16)): @@ -16642,46 +5980,9 @@ def _VOPCOp_V_CMPX_CLASS_F16(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f16)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(64'F(S0.f32)) then - # result = S1.u32[0] - # elsif isQuietNAN(64'F(S0.f32)) then - # result = S1.u32[1] - # elsif exponent(S0.f32) == 255 then - # // +-INF - # result = S1.u32[sign(S0.f32) ? 2 : 9] - # elsif exponent(S0.f32) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f32) ? 3 : 8] - # elsif 64'F(abs(S0.f32)) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f32) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f32) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F32(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(F(S0.f32)): result = S1.u32[0] elif isQuietNAN(F(S0.f32)): @@ -16695,46 +5996,9 @@ def _VOPCOp_V_CMPX_CLASS_F32(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f32)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} -def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0): - # S1.u[0] value is a signaling NAN. - # S1.u[1] value is a quiet NAN. - # S1.u[2] value is negative infinity. - # S1.u[3] value is a negative normal value. - # S1.u[4] value is a negative denormal value. - # S1.u[5] value is negative zero. - # S1.u[6] value is positive zero. - # S1.u[7] value is a positive denormal value. - # S1.u[8] value is a positive normal value. - # S1.u[9] value is positive infinity. - # declare result : 1'U; - # if isSignalNAN(S0.f64) then - # result = S1.u32[0] - # elsif isQuietNAN(S0.f64) then - # result = S1.u32[1] - # elsif exponent(S0.f64) == 2047 then - # // +-INF - # result = S1.u32[sign(S0.f64) ? 2 : 9] - # elsif exponent(S0.f64) > 0 then - # // +-normal value - # result = S1.u32[sign(S0.f64) ? 3 : 8] - # elsif abs(S0.f64) > 0.0 then - # // +-denormal value - # result = S1.u32[sign(S0.f64) ? 4 : 7] - # else - # // +-0.0 - # result = S1.u32[sign(S0.f64) ? 5 : 6] - # endif; - # EXEC.u64[laneId] = result - S0 = Reg(s0) - S1 = Reg(s1) - EXEC = Reg(exec_mask) - laneId = lane - # --- compiled pseudocode --- +def _VOPCOp_V_CMPX_CLASS_F64(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None): if isSignalNAN(S0.f64): result = S1.u32[0] elif isQuietNAN(S0.f64): @@ -16748,10 +6012,7 @@ def _VOPCOp_V_CMPX_CLASS_F64(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, else: result = S1.u32[((5) if (sign(S0.f64)) else (6))] EXEC.u64[laneId] = result - # --- end pseudocode --- - result = {'d0': d0, 'scc': scc & 1} - result['exec_lane'] = (EXEC._val >> lane) & 1 - return result + return {'EXEC': EXEC} VOPCOp_FUNCTIONS = { VOPCOp.V_CMP_LT_F16: _VOPCOp_V_CMP_LT_F16, diff --git a/extra/assembly/amd/emu.py b/extra/assembly/amd/emu.py index b24e904264..4e916604b4 100644 --- a/extra/assembly/amd/emu.py +++ b/extra/assembly/amd/emu.py @@ -1,8 +1,9 @@ # RDNA3 emulator - executes compiled pseudocode from AMD ISA PDF # mypy: ignore-errors from __future__ import annotations -import ctypes, struct -from extra.assembly.amd.dsl import Inst, RawImm, unwrap, FLOAT_ENC, MASK32, MASK64, _f32, _i32, _sext, _f16, _i16, _f64, _i64 +import ctypes +from extra.assembly.amd.dsl import Inst, unwrap, FLOAT_ENC, MASK32, MASK64, _f32, _i32, _sext, _f16, _i16, _f64, _i64 +from extra.assembly.amd.pcode import Reg from extra.assembly.amd.asm import detect_format from extra.assembly.amd.autogen.rdna3.gen_pcode import get_compiled_functions from extra.assembly.amd.autogen.rdna3.ins import (SOP1, SOP2, SOPC, SOPK, SOPP, SMEM, VOP1, VOP2, VOP3, VOP3SD, VOP3P, VOPC, DS, FLAT, VOPD, @@ -178,24 +179,21 @@ def exec_scalar(st: WaveState, inst: Inst) -> int: s0 = st.rsrc64(ssrc0, 0) if inst.is_src_64(0) else (st.rsrc(ssrc0, 0) if not isinstance(inst, (SOPK, SOPP)) else (st.rsgpr(inst.sdst) if isinstance(inst, SOPK) else 0)) s1 = st.rsrc64(inst.ssrc1, 0) if inst.is_src_64(1) else (st.rsrc(inst.ssrc1, 0) if isinstance(inst, (SOP2, SOPC)) else inst.simm16 if isinstance(inst, SOPK) else 0) d0 = st.rsgpr64(sdst) if inst.dst_regs() == 2 and sdst is not None else (st.rsgpr(sdst) if sdst is not None else 0) - exec_mask = st.exec_mask literal = inst.simm16 if isinstance(inst, (SOPK, SOPP)) else st.literal - # Execute compiled function - pass PC in bytes for instructions that need it - # For wave32, mask VCC and EXEC to 32 bits since only the lower 32 bits are relevant - pc_bytes = st.pc * 4 - vcc32, exec32 = st.vcc & MASK32, exec_mask & MASK32 - result = fn(s0, s1, 0, d0, st.scc, vcc32, 0, exec32, literal, None, {}, pc=pc_bytes) + # Create Reg objects for compiled function - mask VCC/EXEC to 32 bits for wave32 + result = fn(Reg(s0), Reg(s1), None, Reg(d0), Reg(st.scc), Reg(st.vcc & MASK32), 0, Reg(st.exec_mask & MASK32), literal, None, PC=Reg(st.pc * 4)) - # Apply results - if sdst is not None: - (st.wsgpr64 if result.get('d0_64') else st.wsgpr)(sdst, result['d0']) - if 'scc' in result: st.scc = result['scc'] - if 'exec' in result: st.exec_mask = result['exec'] - if 'new_pc' in result: + # Apply results - extract values from returned Reg objects + if sdst is not None and 'D0' in result: + (st.wsgpr64 if inst.dst_regs() == 2 else st.wsgpr)(sdst, result['D0']._val) + if 'SCC' in result: st.scc = result['SCC']._val & 1 + if 'EXEC' in result: st.exec_mask = result['EXEC']._val + if 'PC' in result: # Convert absolute byte address to word delta - # new_pc is where we want to go, st.pc is current position, inst._words will be added after - new_pc_words = result['new_pc'] // 4 + pc_val = result['PC']._val + new_pc = pc_val if pc_val < 0x8000000000000000 else pc_val - 0x10000000000000000 + new_pc_words = new_pc // 4 return new_pc_words - st.pc - 1 # -1 because emulator adds inst_words (1 for scalar) return 0 @@ -260,24 +258,25 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No vdsty = (inst.vdsty << 1) | ((inst.vdstx & 1) ^ 1) inputs = [(inst.opx, st.rsrc(inst.srcx0, lane), V[inst.vsrcx1], V[inst.vdstx], inst.vdstx), (inst.opy, st.rsrc(inst.srcy0, lane), V[inst.vsrcy1], V[vdsty], vdsty)] - results = [(dst, fn(s0, s1, 0, d0, st.scc, st.vcc, lane, st.exec_mask, st.literal, None, {})['d0']) - for vopd_op, s0, s1, d0, dst in inputs if (op := _VOPD_TO_VOP.get(vopd_op)) and (fn := compiled.get(type(op), {}).get(op))] - for dst, val in results: V[dst] = val + def exec_vopd(vopd_op, s0, s1, d0): + op = _VOPD_TO_VOP[vopd_op] + return compiled[type(op)][op](Reg(s0), Reg(s1), None, Reg(d0), Reg(st.scc), Reg(st.vcc), lane, Reg(st.exec_mask), st.literal, None)['D0']._val + for vopd_op, s0, s1, d0, dst in inputs: V[dst] = exec_vopd(vopd_op, s0, s1, d0) return # VOP3SD: has extra scalar dest for carry output if isinstance(inst, VOP3SD): - fn = compiled.get(VOP3SDOp, {}).get(inst.op) - if fn is None: raise NotImplementedError(f"{inst.op.name} not in pseudocode") + fn = compiled[VOP3SDOp][inst.op] # Read sources based on register counts from inst properties def rsrc_n(src, regs): return st.rsrc64(src, lane) if regs == 2 else st.rsrc(src, lane) s0, s1, s2 = rsrc_n(inst.src0, inst.src_regs(0)), rsrc_n(inst.src1, inst.src_regs(1)), rsrc_n(inst.src2, inst.src_regs(2)) # Carry-in ops use src2 as carry bitmask instead of VCC vcc = st.rsgpr64(inst.src2) if 'CO_CI' in inst.op_name else st.vcc - result = fn(s0, s1, s2, V[inst.vdst], st.scc, vcc, lane, st.exec_mask, st.literal, None, {}) - V[inst.vdst] = result['d0'] & MASK32 - if result.get('d0_64'): V[inst.vdst + 1] = (result['d0'] >> 32) & MASK32 - if result.get('vcc_lane') is not None: st.pend_sgpr_lane(inst.sdst, lane, result['vcc_lane']) + result = fn(Reg(s0), Reg(s1), Reg(s2), Reg(V[inst.vdst]), Reg(st.scc), Reg(vcc), lane, Reg(st.exec_mask), st.literal, None) + d0_val = result['D0']._val + V[inst.vdst] = d0_val & MASK32 + if inst.dst_regs() == 2: V[inst.vdst + 1] = (d0_val >> 32) & MASK32 + if 'VCC' in result: st.pend_sgpr_lane(inst.sdst, lane, (result['VCC']._val >> lane) & 1) return # Get op enum and sources (None means "no source" for that operand) @@ -317,8 +316,7 @@ def exec_vector(st: WaveState, inst: Inst, lane: int, lds: bytearray | None = No if abs_ & (1<= 256 else (src0 if src0 is not None else 0) - result = fn(s0, s1, s2, d0, st.scc, vcc_for_fn, lane, st.exec_mask, st.literal, st.vgpr, {}, src0_idx, vdst) + result = fn(Reg(s0), Reg(s1), Reg(s2), Reg(d0), Reg(st.scc), Reg(vcc_for_fn), lane, Reg(st.exec_mask), st.literal, st.vgpr, src0_idx, vdst) - # Apply results + # Apply results - extract values from returned Reg objects if 'vgpr_write' in result: # Lane instruction wrote to VGPR: (lane, vgpr_idx, value) wr_lane, wr_idx, wr_val = result['vgpr_write'] st.vgpr[wr_lane][wr_idx] = wr_val - if 'vcc_lane' in result: + if 'VCC' in result: # VOP2 carry ops write to VCC implicitly; VOPC/VOP3 write to vdst - st.pend_sgpr_lane(VCC_LO if isinstance(inst, VOP2) and 'CO_CI' in inst.op_name else vdst, lane, result['vcc_lane']) - if 'exec_lane' in result: - # V_CMPX instructions write to EXEC per-lane - st.pend_sgpr_lane(EXEC_LO, lane, result['exec_lane']) - if 'd0' in result and op_cls is not VOPCOp and 'vgpr_write' not in result: + st.pend_sgpr_lane(VCC_LO if isinstance(inst, VOP2) and 'CO_CI' in inst.op_name else vdst, lane, (result['VCC']._val >> lane) & 1) + if 'EXEC' in result: + # V_CMPX instructions write to EXEC per-lane (not to vdst) + st.pend_sgpr_lane(EXEC_LO, lane, (result['EXEC']._val >> lane) & 1) + elif op_cls is VOPCOp: + # VOPC comparison result stored in D0 bitmask, extract lane bit (non-CMPX only) + st.pend_sgpr_lane(vdst, lane, (result['D0']._val >> lane) & 1) + if op_cls is not VOPCOp and 'vgpr_write' not in result: writes_to_sgpr = 'READFIRSTLANE' in inst.op_name or 'READLANE' in inst.op_name - d0_val = result['d0'] + d0_val = result['D0']._val if writes_to_sgpr: st.wsgpr(vdst, d0_val & MASK32) - elif result.get('d0_64'): V[vdst], V[vdst + 1] = d0_val & MASK32, (d0_val >> 32) & MASK32 + elif inst.dst_regs() == 2: V[vdst], V[vdst + 1] = d0_val & MASK32, (d0_val >> 32) & MASK32 elif inst.is_dst_16(): V[vdst] = _dst16(V[vdst], d0_val, bool(opsel & 8) if isinstance(inst, VOP3) else dst_hi) else: V[vdst] = d0_val & MASK32 diff --git a/extra/assembly/amd/pdf.py b/extra/assembly/amd/pdf.py index a7a91a2166..abd35022cd 100644 --- a/extra/assembly/amd/pdf.py +++ b/extra/assembly/amd/pdf.py @@ -43,7 +43,10 @@ UNSUPPORTED = ['SGPR[', 'V_SWAP', 'eval ', 'FATAL_HALT', 'HW_REGISTERS', 'vscnt', 'vmcnt', 'expcnt', 'lgkmcnt', 'CVT_OFF_TABLE', 'ThreadMask', 'S1[i', 'C.i32', 'S[i]', 'in[', - 'if n.', 'DST.u32', 'addrd = DST', 'addr = DST'] # Malformed pseudocode from PDF + 'if n.', 'DST.u32', 'addrd = DST', 'addr = DST', + 'BARRIER_STATE', 'ReallocVgprs', + 'GPR_IDX', 'VSKIP', 'specified in', 'TTBL', + 'fp6', 'bf6'] # Malformed pseudocode from PDF # ═══════════════════════════════════════════════════════════════════════════════ # COMPILER: pseudocode -> Python (minimal transforms) @@ -51,6 +54,7 @@ UNSUPPORTED = ['SGPR[', 'V_SWAP', 'eval ', 'FATAL_HALT', 'HW_REGISTERS', def compile_pseudocode(pseudocode: str) -> str: """Compile pseudocode to Python. Transforms are minimal - most syntax just works.""" + pseudocode = re.sub(r'\bpass\b', 'pass_', pseudocode) # 'pass' is Python keyword raw_lines = pseudocode.strip().split('\n') joined_lines: list[str] = [] for line in raw_lines: @@ -113,7 +117,7 @@ def compile_pseudocode(pseudocode: str) -> str: break else: lhs, rhs = line.split('=', 1) - lhs_s, rhs_s = lhs.strip(), rhs.strip() + lhs_s, rhs_s = _expr(lhs.strip()), rhs.strip() stmt = _assign(lhs_s, _expr(rhs_s)) if in_first_match_loop and rhs_s == 'i' and (lhs_s == 'tmp' or lhs_s == 'D0.i32'): stmt += "; break" @@ -533,52 +537,57 @@ def _apply_pseudocode_fixes(op, code: str) -> str: def _generate_function(cls_name: str, op, pc: str, code: str) -> tuple[str, str]: """Generate a single compiled pseudocode function.""" - is_64 = any(p in pc for p in ['D0.u64', 'D0.b64', 'D0.f64', 'D0.i64', 'D1.u64', 'D1.b64', 'D1.f64', 'D1.i64']) has_d1 = '{ D1' in pc - if has_d1: is_64 = True - is_cmp = (cls_name in ('VOPCOp', 'VOP3Op')) and 'D0.u64[laneId]' in pc is_cmpx = (cls_name in ('VOPCOp', 'VOP3Op')) and 'EXEC.u64[laneId]' in pc is_div_scale = 'DIV_SCALE' in op.name has_sdst = cls_name == 'VOP3SDOp' and ('VCC.u64[laneId]' in pc or is_div_scale) - has_pc = 'PC' in pc combined = code + pc fn_name = f"_{cls_name}_{op.name}" - lines = [f"def {fn_name}(s0, s1, s2, d0, scc, vcc, lane, exec_mask, literal, VGPR, _vars, src0_idx=0, vdst_idx=0, pc=0):"] - for pc_line in pc.split('\n'): lines.append(f" # {pc_line}") + # Function accepts Reg objects directly (uppercase names), laneId is passed directly as int + lines = [f"def {fn_name}(S0, S1, S2, D0, SCC, VCC, laneId, EXEC, literal, VGPR, src0_idx=0, vdst_idx=0, PC=None):"] - regs = [('S0', 'Reg(s0)'), ('S1', 'Reg(s1)'), ('S2', 'Reg(s2)'), - ('D0', 'Reg(s0)' if is_div_scale else 'Reg(d0)'), ('D1', 'Reg(0)'), - ('SCC', 'Reg(scc)'), ('VCC', 'Reg(vcc)'), ('EXEC', 'Reg(exec_mask)'), - ('tmp', 'Reg(0)'), ('saveexec', 'Reg(exec_mask)'), ('laneId', 'lane'), - ('SIMM16', 'Reg(literal)'), ('SIMM32', 'Reg(literal)'), - ('SRC0', 'Reg(src0_idx)'), ('VDST', 'Reg(vdst_idx)'), ('PC', 'Reg(pc)')] - used = {name for name, _ in regs if name in combined} - if 'EXEC_LO' in combined or 'EXEC_HI' in combined: used.add('EXEC') - if 'VCCZ' in combined: used.add('VCC') - if 'EXECZ' in combined: used.add('EXEC') - for name, init in regs: - if name in used: lines.append(f" {name} = {init}") - if 'EXEC_LO' in combined: lines.append(" EXEC_LO = SliceProxy(EXEC, 31, 0)") - if 'EXEC_HI' in combined: lines.append(" EXEC_HI = SliceProxy(EXEC, 63, 32)") - if 'VCCZ' in combined: lines.append(" VCCZ = Reg(1 if VCC._val == 0 else 0)") - if 'EXECZ' in combined: lines.append(" EXECZ = Reg(1 if EXEC._val == 0 else 0)") - lines.append(" # --- compiled pseudocode ---") - for line in code.split('\n'): lines.append(f" {line}") - lines.append(" # --- end pseudocode ---") - d0_val, scc_val = ("D0._val" if 'D0' in used else "d0"), ("SCC._val & 1" if 'SCC' in used else "scc & 1") - lines.append(f" result = {{'d0': {d0_val}, 'scc': {scc_val}}}") - if has_sdst: lines.append(" result['vcc_lane'] = (VCC._val >> lane) & 1") - elif 'VCC' in used: lines.append(" if VCC._val != vcc: result['vcc_lane'] = (VCC._val >> lane) & 1") - if is_cmpx: lines.append(" result['exec_lane'] = (EXEC._val >> lane) & 1") - elif 'EXEC' in used: lines.append(" if EXEC._val != exec_mask: result['exec'] = EXEC._val") - if is_cmp: lines.append(" result['vcc_lane'] = (D0._val >> lane) & 1") - if is_64: lines.append(" result['d0_64'] = True") - if has_d1: lines.append(" result['d1'] = D1._val & 1") - if has_pc: - lines.append(" _pc = PC._val if PC._val < 0x8000000000000000 else PC._val - 0x10000000000000000") - lines.append(" result['new_pc'] = _pc") - lines.append(" return result\n") + # Registers that need special handling (not passed directly) + # Only init if used but not first assigned as `name = Reg(...)` in the compiled code + def needs_init(name): return name in combined and not re.search(rf'^\s*{name}\s*=\s*Reg\(', code, re.MULTILINE) + special_regs = [('D1', 'Reg(0)'), ('SIMM16', 'Reg(literal)'), ('SIMM32', 'Reg(literal)'), + ('SRC0', 'Reg(src0_idx)'), ('VDST', 'Reg(vdst_idx)')] + if needs_init('tmp'): special_regs.insert(0, ('tmp', 'Reg(0)')) + if needs_init('saveexec'): special_regs.insert(0, ('saveexec', 'Reg(EXEC._val)')) + used = {name for name, _ in special_regs if name in combined} + + # Detect which registers are modified (not just read) - look for assignments + modifies_d0 = is_div_scale or bool(re.search(r'\bD0\b[.\[]', combined)) + modifies_exec = is_cmpx or bool(re.search(r'EXEC\.(u32|u64|b32|b64)\s*=', combined)) + modifies_vcc = has_sdst or bool(re.search(r'VCC\.(u32|u64|b32|b64)\s*=|VCC\.u64\[laneId\]\s*=', combined)) + modifies_scc = bool(re.search(r'\bSCC\s*=', combined)) + modifies_pc = bool(re.search(r'\bPC\s*=', combined)) + + # Build init code for special registers + init_lines = [] + if is_div_scale: init_lines.append(" D0 = Reg(S0._val)") + for name, init in special_regs: + if name in used: init_lines.append(f" {name} = {init}") + if 'EXEC_LO' in code: init_lines.append(" EXEC_LO = SliceProxy(EXEC, 31, 0)") + if 'EXEC_HI' in code: init_lines.append(" EXEC_HI = SliceProxy(EXEC, 63, 32)") + if 'VCCZ' in code and not re.search(r'^\s*VCCZ\s*=', code, re.MULTILINE): init_lines.append(" VCCZ = Reg(1 if VCC._val == 0 else 0)") + if 'EXECZ' in code and not re.search(r'^\s*EXECZ\s*=', code, re.MULTILINE): init_lines.append(" EXECZ = Reg(1 if EXEC._val == 0 else 0)") + code_lines = [line for line in code.split('\n') if line.strip()] + if init_lines: + lines.extend(init_lines) + if code_lines: lines.append(" # --- compiled pseudocode ---") + for line in code_lines: + lines.append(f" {line}") + + # Build result dict - only include registers that are modified + result_items = [] + if modifies_d0: result_items.append("'D0': D0") + if modifies_scc: result_items.append("'SCC': SCC") + if modifies_vcc: result_items.append("'VCC': VCC") + if modifies_exec: result_items.append("'EXEC': EXEC") + if has_d1: result_items.append("'D1': D1") + if modifies_pc: result_items.append("'PC': PC") + lines.append(f" return {{{', '.join(result_items)}}}\n") return fn_name, '\n'.join(lines) # ═══════════════════════════════════════════════════════════════════════════════ diff --git a/extra/assembly/amd/test/test_pcode.py b/extra/assembly/amd/test/test_pcode.py index 9fdc630c00..be1274a863 100644 --- a/extra/assembly/amd/test/test_pcode.py +++ b/extra/assembly/amd/test/test_pcode.py @@ -229,17 +229,18 @@ class TestPseudocodeRegressions(unittest.TestCase): """Regression tests for pseudocode instruction emulation bugs.""" def test_v_div_scale_f32_vcc_always_returned(self): - """V_DIV_SCALE_F32 must always return vcc_lane, even when VCC=0 (no scaling needed). - Bug: when VCC._val == vcc (both 0), vcc_lane wasn't returned, so VCC bits weren't written. + """V_DIV_SCALE_F32 must always return VCC, even when VCC=0 (no scaling needed). + Bug: when VCC._val == vcc (both 0), VCC wasn't returned, so VCC bits weren't written. This caused division to produce wrong results for multiple lanes.""" # Normal case: 1.0 / 3.0, no scaling needed, VCC should be 0 - s0 = 0x3f800000 # 1.0 - s1 = 0x40400000 # 3.0 - s2 = 0x3f800000 # 1.0 (numerator) - result = _VOP3SDOp_V_DIV_SCALE_F32(s0, s1, s2, 0, 0, 0, 0, 0xffffffff, 0, None, {}) - # Must always have vcc_lane in result - self.assertIn('vcc_lane', result, "V_DIV_SCALE_F32 must always return vcc_lane") - self.assertEqual(result['vcc_lane'], 0, "vcc_lane should be 0 when no scaling needed") + S0 = Reg(0x3f800000) # 1.0 + S1 = Reg(0x40400000) # 3.0 + S2 = Reg(0x3f800000) # 1.0 (numerator) + D0, SCC, VCC, EXEC = Reg(0), Reg(0), Reg(0), Reg(0xffffffff) + result = _VOP3SDOp_V_DIV_SCALE_F32(S0, S1, S2, D0, SCC, VCC, 0, EXEC, 0, None) + # Must always have VCC in result + self.assertIn('VCC', result, "V_DIV_SCALE_F32 must always return VCC") + self.assertEqual(result['VCC']._val & 1, 0, "VCC lane 0 should be 0 when no scaling needed") def test_v_cmp_class_f32_detects_quiet_nan(self): """V_CMP_CLASS_F32 must correctly identify quiet NaN vs signaling NaN. @@ -248,18 +249,22 @@ class TestPseudocodeRegressions(unittest.TestCase): signal_nan = 0x7f800001 # signaling NaN: exponent=255, bit22=0 # Test quiet NaN detection (bit 1 in mask) s1_quiet = 0b0000000010 # bit 1 = quiet NaN - result = _VOPCOp_V_CMP_CLASS_F32(quiet_nan, s1_quiet, 0, 0, 0, 0, 0, 0xffffffff, 0, None, {}) - self.assertEqual(result['vcc_lane'], 1, "Should detect quiet NaN with quiet NaN mask") + S0, S1, S2, D0, SCC, VCC, EXEC = Reg(quiet_nan), Reg(s1_quiet), Reg(0), Reg(0), Reg(0), Reg(0), Reg(0xffffffff) + result = _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, 0, EXEC, 0, None) + self.assertEqual(result['D0']._val & 1, 1, "Should detect quiet NaN with quiet NaN mask") # Test signaling NaN detection (bit 0 in mask) s1_signal = 0b0000000001 # bit 0 = signaling NaN - result = _VOPCOp_V_CMP_CLASS_F32(signal_nan, s1_signal, 0, 0, 0, 0, 0, 0xffffffff, 0, None, {}) - self.assertEqual(result['vcc_lane'], 1, "Should detect signaling NaN with signaling NaN mask") + S0, S1 = Reg(signal_nan), Reg(s1_signal) + result = _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, 0, EXEC, 0, None) + self.assertEqual(result['D0']._val & 1, 1, "Should detect signaling NaN with signaling NaN mask") # Test that quiet NaN doesn't match signaling NaN mask - result = _VOPCOp_V_CMP_CLASS_F32(quiet_nan, s1_signal, 0, 0, 0, 0, 0, 0xffffffff, 0, None, {}) - self.assertEqual(result['vcc_lane'], 0, "Quiet NaN should not match signaling NaN mask") + S0, S1 = Reg(quiet_nan), Reg(s1_signal) + result = _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, 0, EXEC, 0, None) + self.assertEqual(result['D0']._val & 1, 0, "Quiet NaN should not match signaling NaN mask") # Test that signaling NaN doesn't match quiet NaN mask - result = _VOPCOp_V_CMP_CLASS_F32(signal_nan, s1_quiet, 0, 0, 0, 0, 0, 0xffffffff, 0, None, {}) - self.assertEqual(result['vcc_lane'], 0, "Signaling NaN should not match quiet NaN mask") + S0, S1 = Reg(signal_nan), Reg(s1_quiet) + result = _VOPCOp_V_CMP_CLASS_F32(S0, S1, S2, D0, SCC, VCC, 0, EXEC, 0, None) + self.assertEqual(result['D0']._val & 1, 0, "Signaling NaN should not match quiet NaN mask") def test_isnan_with_typed_view(self): """_isnan must work with TypedView objects, not just Python floats. From 8bf7c9c1d25c51787c7af6401065b0e7be115309 Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 31 Dec 2025 17:28:39 -0500 Subject: [PATCH 09/25] no-op cleanups for ptx [pr] (#13938) --- tinygrad/renderer/ptx.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tinygrad/renderer/ptx.py b/tinygrad/renderer/ptx.py index 953c19de9c..c21f857d40 100644 --- a/tinygrad/renderer/ptx.py +++ b/tinygrad/renderer/ptx.py @@ -133,7 +133,7 @@ string_rewrite = PatternMatcher([ (UPat(Ops.IF, name="x"), lambda ctx, x: f"@!{ctx.r[x.src[0]]} bra IF_{ctx.r[x.src[0]][1:]}_{ctx.uops.index(x)};"), (UPat(Ops.ENDIF, name="x"), lambda ctx, x: f"IF_{ctx.r[x.src[0].src[0]][1:]}_{ctx.uops.index(x.src[0])}:"), (UPat(Ops.WMMA, name="x"), lambda ctx, x: list(render_wmma(ctx, x))), - (UPat(Ops.BARRIER, name="x"), lambda ctx, x: ctx.barrier), + (UPat(Ops.BARRIER), lambda ctx: ctx.barrier), (UPat(Ops.DEFINE_VAR, name="x"), lambda ctx, x: f"ld.param.{ctx.mem_types[x.dtype]} {ctx.r[x]}, [{x.arg[0]}+0];"), ]) @@ -180,7 +180,7 @@ class PTXRenderer(Renderer): self.uops = uops def ssa(prefix:str, u:UOp|None=None, dtype:str|None=None) -> str: - nonlocal c, r + nonlocal c prefix += f"_{dtype if dtype is not None else self.types[unwrap(u).dtype.base]}_" c[prefix] += 1 return f"%{prefix}{c[prefix]-1}" @@ -230,7 +230,7 @@ class PTXRenderer(Renderer): [ssa("wmma_acc", dtype="b32") for _ in range(0, len(r[u.src[2]]), 4 // u.dtype.scalar().itemsize)]] r[u] = [ssa("wmma", dtype=self.types[u.dtype.scalar()]) for _ in range(u.dtype.count)] prefix, dtype = {Ops.CAST: ("cast", None), Ops.BITCAST: ("cast", None), Ops.END: ("pred", "pred"), Ops.RANGE: ("ridx", None), - Ops.DEFINE_VAR: ("dat", None), Ops.CONST: ("const", None), Ops.DEFINE_LOCAL: ("local",self.types[dtypes.ulong]), + Ops.DEFINE_VAR: ("dat", None), Ops.CONST: ("const", None), Ops.DEFINE_LOCAL: ("local", self.types[dtypes.ulong]), Ops.DEFINE_GLOBAL: ("dat", self.types[dtypes.ulong]), **{op: ("alu", None) for op in GroupOp.ALU}}.get(u.op, (None, None)) if prefix: r[u] = ssa(prefix, u, dtype) From e2987001ee8129819269440608a407d357acff3b Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 31 Dec 2025 17:51:51 -0500 Subject: [PATCH 10/25] unify pre-commit mypy and ci mypy (#13940) --- .github/workflows/test.yml | 5 ++--- .pre-commit-config.yaml | 2 +- pyproject.toml | 5 +++++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7fe8d47100..8c5f411aba 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -218,7 +218,6 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 10 - # TODO: run the pre-commit hook to replace a lot of this steps: - name: Checkout Code uses: actions/checkout@v4 @@ -233,13 +232,13 @@ jobs: - name: Lint with ruff run: | pip3 install --upgrade --force-reinstall ruff==0.14.10 - python3 -m ruff check . + pre-commit run ruff --all-files python3 -m ruff check examples/mlperf/ --ignore E501 python3 -m ruff check extra/thunder/tiny/ --ignore E501 --ignore F841 --ignore E722 python3 -m ruff check extra/torch_backend/backend.py - name: Run mypy run: | - python -m mypy --strict-equality --lineprecision-report . + python -m mypy --lineprecision-report . cat lineprecision.txt - name: Run TYPED=1 run: TYPED=1 python -c "import tinygrad" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c9be0c75b1..bf364a256e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: pass_filenames: false - id: mypy name: mypy - entry: python3 -m mypy tinygrad/ --strict-equality + entry: python3 -m mypy language: system always_run: true pass_filenames: false diff --git a/pyproject.toml b/pyproject.toml index c3ced6a32b..ed2f48639b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -135,9 +135,14 @@ check_untyped_defs = true explicit_package_bases = true warn_unreachable = true warn_redundant_casts = true +strict_equality = true # NOTE: had to comment this out to make mypy pass on both CI and OSX #warn_unused_ignores = true +[[tool.mypy.overrides]] +module = "extra.*" +follow_imports = "skip" + [tool.pytest.ini_options] norecursedirs = [ "extra", From 0ed58c1fcd345626aa670f9a85b8e5ea9fa98b13 Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 31 Dec 2025 18:29:16 -0500 Subject: [PATCH 11/25] clean up some functions in helpers [pr] (#13942) --- tinygrad/helpers.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/tinygrad/helpers.py b/tinygrad/helpers.py index aca12c4d3b..f5660f8f3f 100644 --- a/tinygrad/helpers.py +++ b/tinygrad/helpers.py @@ -38,15 +38,11 @@ def ansilen(s:str): return len(ansistrip(s)) def make_tuple(x:int|Sequence[int], cnt:int) -> tuple[int, ...]: return (x,)*cnt if isinstance(x, int) else tuple(x) def flatten(l:Iterable[Iterable[T]]): return [item for sublist in l for item in sublist] def fully_flatten(l): - if hasattr(l, "__len__") and hasattr(l, "__getitem__") and not isinstance(l, str): - if hasattr(l, "shape") and l.shape == (): return [l[()]] - flattened = [] - for li in l: flattened.extend(fully_flatten(li)) - return flattened - return [l] + if not (hasattr(l, "__len__") and hasattr(l, "__getitem__")) or isinstance(l, str): return [l] + return [l[()]] if hasattr(l, "shape") and l.shape == () else [x for li in l for x in fully_flatten(li)] def fromimport(mod, frm): return getattr(__import__(mod, fromlist=[frm]), frm) def _is_balanced(s:str) -> bool: return (d := 0, all((d := d + (c == '(') - (c == ')')) >= 0 for c in s))[1] and d == 0 -def strip_parens(fst:str) -> str: return fst[1:-1] if fst and fst[0]=='(' and fst[-1] == ')' and _is_balanced(fst[1:-1]) else fst +def strip_parens(fst:str) -> str: return fst[1:-1] if fst[:1]=='(' and fst[-1:]==')' and _is_balanced(fst[1:-1]) else fst def ceildiv(num, amt): return int(ret) if isinstance((ret:=-(num//-amt)), float) else ret def round_up(num:int, amt:int) -> int: return (num+amt-1)//amt * amt def round_down(num:int, amt:int) -> int: return -round_up(-num, amt) @@ -88,9 +84,7 @@ def word_wrap(x, wrap=80): while len(ansistrip(x[:i])) < wrap and i < len(x): i += 1 return x[:i] + "\n" + word_wrap(x[i:], wrap) def pad_bytes(b:bytes, align:int) -> bytes: return b + b'\x00' * ((align - (len(b) % align)) % align) -def panic(e:Exception|None=None): - if e is None: raise RuntimeError("PANIC!") - raise e +def panic(e:Exception|None=None): raise e if e is not None else RuntimeError("PANIC!") @functools.cache def canonicalize_strides(shape:tuple[T, ...], strides:tuple[T, ...]) -> tuple[T, ...]: @@ -150,9 +144,7 @@ def getenv(key:str, default:Any=0): return type(default)(os.getenv(key, default) def temp(x:str, append_user:bool=False) -> str: return (pathlib.Path(tempfile.gettempdir()) / (f"{x}.{getpass.getuser()}" if append_user else x)).as_posix() -def stderr_log(msg): - sys.stderr.write(msg) - sys.stderr.flush() +def stderr_log(msg:str): print(msg, end='', file=sys.stderr, flush=True) class Context(contextlib.ContextDecorator): def __init__(self, **kwargs): self.kwargs = kwargs From 20777f30b9f0eb1ebed0c6967c11c533bc5dc6c1 Mon Sep 17 00:00:00 2001 From: haofei Date: Wed, 31 Dec 2025 20:40:09 -0800 Subject: [PATCH 12/25] Fix QR/SVD NaNs on zero/orthogonal inputs (#13943) --- test/unit/test_linalg.py | 18 ++++++++++++++++++ tinygrad/tensor.py | 17 +++++++++++------ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/test/unit/test_linalg.py b/test/unit/test_linalg.py index beb8800ff2..d9c5f52507 100644 --- a/test/unit/test_linalg.py +++ b/test/unit/test_linalg.py @@ -65,6 +65,24 @@ class TestLinAlg(unittest.TestCase): orthogonality_helper(Q) reconstruction_helper([Q,R],a) + def test_qr_zero_column(self): + a = Tensor([[0.0, 1.0], [0.0, 2.0]]).realize() + Q,R = a.qr() + assert not np.isnan(Q.numpy()).any() + assert not np.isnan(R.numpy()).any() + orthogonality_helper(Q) + reconstruction_helper([Q,R], a) + + def test_svd_identity(self): + for a in (Tensor.eye(2), Tensor.zeros(2, 2)): + a = a.realize() + U,S,V = a.svd() + assert not np.isnan(U.numpy()).any() + assert not np.isnan(S.numpy()).any() + assert not np.isnan(V.numpy()).any() + s_diag = (S.unsqueeze(-2) * Tensor.eye(2)) + reconstruction_helper([U, s_diag, V], a) + def test_newton_schulz(self): coefficients = [(2, -1.5, 0.5), (2.0, -1.4, 0.2, 0.2)]#these params map to the sign function sizes = [(2,2), (3,2), (2,3), (2,2,2)] diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index d0c531bb95..736e6868de 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -3637,11 +3637,13 @@ class Tensor(OpMixin): Q = Tensor.eye(m, dtype=self.dtype).reshape((1,) * len(b_shape) + (m, m)).expand(b_shape + (m, m)).contiguous() for i in range(min(m, n)): x = R[..., i:m, i].contiguous() # TODO: without contigous this can silently be wrong, should at least assert - s = -x[..., 0].sign() - u1 = x[..., 0] - s * x.square().sum(-1).sqrt() - w = x.unsqueeze(-1) / u1.reshape(b_shape + (1, 1)) + norm = x.square().sum(-1).sqrt() + s = (x[..., 0] != 0).where(-x[..., 0].sign(), -1) + u1 = x[..., 0] - s * norm + w = x.unsqueeze(-1) / (norm != 0).where(u1, 1).reshape(b_shape + (1, 1)) w[..., 0, 0] = 1 - tau = (-s * u1 / x.square().sum(-1).sqrt()).reshape(b_shape + (1, 1)) + tau = (-s * u1 / (norm != 0).where(norm, 1)).reshape(b_shape + (1, 1)) + tau = (norm != 0).reshape(b_shape + (1, 1)).where(tau, 0) R[..., i:m, :] = R[..., i:m, :] - (w * tau) @ (w.transpose(-2, -1) @ R[..., i:m, :]) Q[..., :, i:m] = Q[..., :, i:m] - (Q[..., :, i:m] @ w) @ (tau * w).transpose(-2, -1) return Q,R @@ -3668,8 +3670,10 @@ class Tensor(OpMixin): #compute the jacobi rotations for each pairing gamma = (U_left * U_right).sum(-2).reshape(b_shape + (1, num//2)) alpha, beta = U_permuted.square().sum(-2).unsqueeze(-2).split(num//2, -1) - tau = (beta - alpha) / (2 * gamma) + rot = gamma != 0 + tau = (beta - alpha) / (2 * rot.where(gamma, 1)) t = tau.sign() / (tau.abs() + (1 + tau.square()).sqrt()) + t = rot.where(t, 0) c = 1 / (1 + t.square()).sqrt() s = c * t #apply the rotations @@ -3688,7 +3692,8 @@ class Tensor(OpMixin): S, indices = U.square().sum(-2).sqrt().sort(dim = -1, descending=True) new_indices = Tensor.arange(num).reshape((1,) * (self.ndim - 1) + (num,)).expand(b_shape + (num, num)).contiguous() new_indices[..., :num] = indices.reshape(b_shape + (1, num)).expand(b_shape + (num, num)) - U, V = U.gather(-1, new_indices[...,0:num,0:num]) / S.unsqueeze(-2), V.gather(-1, new_indices[..., 0:num, 0:num]).realize() + U = U.gather(-1, new_indices[..., 0:num, 0:num]) / (S != 0).where(S, 1).unsqueeze(-2) + V = V.gather(-1, new_indices[..., 0:num, 0:num]).realize() padded_u = Tensor.eye(q_num, dtype=U.dtype).reshape((1,) * len(b_shape) + (q_num, q_num)).expand(b_shape + (q_num, q_num)).contiguous() padded_u[..., 0:num, 0:num] = U From 526fd4ec7104eda1ef8114e64d99b2788910a8fd Mon Sep 17 00:00:00 2001 From: haofei Date: Wed, 31 Dec 2025 21:30:18 -0800 Subject: [PATCH 13/25] =?UTF-8?q?Fix=20SVD=20rank=E2=80=911=20Jacobi=20rot?= =?UTF-8?q?ation=20when=20tau=20=3D=3D=200=20(#13945)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/unit/test_linalg.py | 6 ++++++ tinygrad/tensor.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/test/unit/test_linalg.py b/test/unit/test_linalg.py index d9c5f52507..5bc33590b1 100644 --- a/test/unit/test_linalg.py +++ b/test/unit/test_linalg.py @@ -83,6 +83,12 @@ class TestLinAlg(unittest.TestCase): s_diag = (S.unsqueeze(-2) * Tensor.eye(2)) reconstruction_helper([U, s_diag, V], a) + def test_svd_rank1(self): + a = Tensor([[1.0, 1.0], [2.0, 2.0]]).realize() + U, S, V = a.svd() + np.testing.assert_allclose(S.numpy(), [np.sqrt(10), 0.0], atol=1e-4, rtol=1e-4) + reconstruction_helper([U, S.unsqueeze(-2) * Tensor.eye(2), V], a) + def test_newton_schulz(self): coefficients = [(2, -1.5, 0.5), (2.0, -1.4, 0.2, 0.2)]#these params map to the sign function sizes = [(2,2), (3,2), (2,3), (2,2,2)] diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 736e6868de..4e650acb49 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -3672,7 +3672,7 @@ class Tensor(OpMixin): alpha, beta = U_permuted.square().sum(-2).unsqueeze(-2).split(num//2, -1) rot = gamma != 0 tau = (beta - alpha) / (2 * rot.where(gamma, 1)) - t = tau.sign() / (tau.abs() + (1 + tau.square()).sqrt()) + t = (tau != 0).where(tau.sign(), 1) / (tau.abs() + (1 + tau.square()).sqrt()) t = rot.where(t, 0) c = 1 / (1 + t.square()).sqrt() s = c * t From 1c5ed8e8b5c5a5aa83aff01c45135923cee0839d Mon Sep 17 00:00:00 2001 From: nimlgen <138685161+nimlgen@users.noreply.github.com> Date: Thu, 1 Jan 2026 14:39:21 +0300 Subject: [PATCH 14/25] am: remove doorbells from setup_ring (#13946) --- tinygrad/runtime/ops_amd.py | 8 ++++---- tinygrad/runtime/support/am/ip.py | 13 +++++++------ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tinygrad/runtime/ops_amd.py b/tinygrad/runtime/ops_amd.py index b3a45dec96..00a3a6c680 100644 --- a/tinygrad/runtime/ops_amd.py +++ b/tinygrad/runtime/ops_amd.py @@ -834,11 +834,11 @@ class PCIIface(PCIIfaceBase): if queue_type == kfd.KFD_IOC_QUEUE_TYPE_SDMA: assert idx <= 3, "only 4 SDMA queues supported in am" - pv = self.dev_impl.sdma.setup_ring(ring_addr=ring.va_addr, ring_size=ring.size, rptr_addr=gart.va_addr+rptr, wptr_addr=gart.va_addr+wptr, - doorbell=(doorbell_index:=am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 + idx * 0xA * 4), pipe=0, queue=idx) + pv, doorbell_index = self.dev_impl.sdma.setup_ring(ring_addr=ring.va_addr, ring_size=ring.size, rptr_addr=gart.va_addr+rptr, + wptr_addr=gart.va_addr+wptr, pipe=0, queue=idx) else: - pv = self.dev_impl.gfx.setup_ring(ring_addr=ring.va_addr, ring_size=ring.size, rptr_addr=gart.va_addr+rptr, wptr_addr=gart.va_addr+wptr, - eop_addr=eop_buffer.va_addr, eop_size=eop_buffer.size, doorbell=(doorbell_index:=am.AMDGPU_NAVI10_DOORBELL_MEC_RING0), pipe=0, + pv, doorbell_index = self.dev_impl.gfx.setup_ring(ring_addr=ring.va_addr, ring_size=ring.size, rptr_addr=gart.va_addr+rptr, + wptr_addr=gart.va_addr+wptr, eop_addr=eop_buffer.va_addr, eop_size=eop_buffer.size, pipe=0, queue=int(is_aql:=(queue_type==kfd.KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)), aql=is_aql) return AMDQueueDesc(ring=ring.cpu_view().view(fmt='I'), doorbells=[self.dev_impl.doorbell64.view(doorbell_index * 8, 8, fmt='Q')], diff --git a/tinygrad/runtime/support/am/ip.py b/tinygrad/runtime/support/am/ip.py index 3745d1d793..c06cc4d55b 100644 --- a/tinygrad/runtime/support/am/ip.py +++ b/tinygrad/runtime/support/am/ip.py @@ -281,9 +281,10 @@ class AM_GFX(AM_IP): self._grbm_select(inst=xcc) for xcc in range(self.xccs): self.adev.regGCVM_CONTEXT0_CNTL.write(0, inst=xcc) - def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, eop_addr:int, eop_size:int, doorbell:int, pipe:int, queue:int, - aql:bool) -> int: + def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, eop_addr:int, eop_size:int, pipe:int, queue:int, + aql:bool) -> tuple[int, int]: self._grbm_select(me=1, pipe=pipe, queue=queue, inst=0) + doorbell = am.AMDGPU_NAVI10_DOORBELL_MEC_RING0 restore_queue = aql and self.xccs > 1 and self.adev.partial_boot and (self.adev.regCP_HQD_ACTIVE.read(inst=0) & 1) restore_ptr = (self.adev.regCP_HQD_PQ_WPTR_LO.read(inst=0) | (self.adev.regCP_HQD_PQ_WPTR_HI.read(inst=0) << 32)) if restore_queue else 0 if DEBUG >= 2 and restore_queue: print(f"am {self.adev.devfmt}: GFX queue already active, continuing from saved state {restore_ptr=:#x}.") @@ -327,7 +328,7 @@ class AM_GFX(AM_IP): self._grbm_select(inst=xcc) self.adev.reg(f"regCP_ME1_PIPE{pipe}_INT_CNTL").update(time_stamp_int_enable=1, generic0_int_enable=1, inst=xcc) - return restore_ptr // 16 + return restore_ptr // 16, doorbell def set_clockgating_state(self): if hasattr(self.adev, 'regMM_ATC_L2_MISC_CG'): self.adev.regMM_ATC_L2_MISC_CG.write(enable=1, mem_ls_enable=1) @@ -447,9 +448,9 @@ class AM_SDMA(AM_IP): time.sleep(0.01) self.adev.regGRBM_SOFT_RESET.write(0x0) - def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, doorbell:int, pipe:int, queue:int) -> int: - # Setup the ring + def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, pipe:int, queue:int) -> tuple[int, int]: reg, inst = ("regSDMA_GFX", pipe+queue*4) if self.adev.ip_ver[am.SDMA0_HWIP][:2] == (4,4) else (f"regSDMA{pipe}_QUEUE{queue}", 0) + doorbell = am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0 + (pipe+queue*4) * 0xA self.sdma_reginst.append((reg, inst)) self.adev.reg(f"{reg}_MINOR_PTR_UPDATE").write(0x1, inst=inst) @@ -464,7 +465,7 @@ class AM_SDMA(AM_IP): self.adev.reg(f"{reg}_RB_CNTL").write(**({f'{self.sdma_name.lower()}_wptr_poll_enable':1} if self.adev.ip_ver[am.SDMA0_HWIP][:2]!=(4,4) else {}), rb_vmid=0, rptr_writeback_enable=1, rptr_writeback_timer=4, rb_enable=1, rb_priv=1, rb_size=(ring_size//4).bit_length()-1, inst=inst) self.adev.reg(f"{reg}_IB_CNTL").update(ib_enable=1, inst=inst) - return self.adev.reg(f"{reg}_RB_WPTR").read(inst=inst) | (self.adev.reg(f"{reg}_RB_WPTR_HI").read(inst=inst) << 32) + return self.adev.reg(f"{reg}_RB_WPTR").read(inst=inst) | (self.adev.reg(f"{reg}_RB_WPTR_HI").read(inst=inst) << 32), doorbell class AM_PSP(AM_IP): def init_sw(self): From baff10d32c21f768f5f1a61e925d278fbdfb2610 Mon Sep 17 00:00:00 2001 From: chenyu Date: Thu, 1 Jan 2026 08:18:45 -0500 Subject: [PATCH 15/25] clean up Tensor.svd slices (#13948) --- tinygrad/tensor.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 4e650acb49..c50dcfcb6a 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -3690,10 +3690,9 @@ class Tensor(OpMixin): for _ in range(max_iterations * iterations_per_round): U, V, permute, inverse_permute = one_round_jacobi(U, V, permute, inverse_permute) #extract singular values and sort. construct U from Q S, indices = U.square().sum(-2).sqrt().sort(dim = -1, descending=True) - new_indices = Tensor.arange(num).reshape((1,) * (self.ndim - 1) + (num,)).expand(b_shape + (num, num)).contiguous() - new_indices[..., :num] = indices.reshape(b_shape + (1, num)).expand(b_shape + (num, num)) - U = U.gather(-1, new_indices[..., 0:num, 0:num]) / (S != 0).where(S, 1).unsqueeze(-2) - V = V.gather(-1, new_indices[..., 0:num, 0:num]).realize() + new_indices = indices.reshape(b_shape + (1, num)).expand(b_shape + (num, num)) + U = U.gather(-1, new_indices) / (S != 0).where(S, 1).unsqueeze(-2) + V = V.gather(-1, new_indices).realize() padded_u = Tensor.eye(q_num, dtype=U.dtype).reshape((1,) * len(b_shape) + (q_num, q_num)).expand(b_shape + (q_num, q_num)).contiguous() padded_u[..., 0:num, 0:num] = U From 6a5430ab00cc0eb467de382db2d92d31d16a68ab Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Thu, 1 Jan 2026 23:01:46 +0900 Subject: [PATCH 16/25] correct args order in mi350x gemm (#13949) --- extra/gemm/asm/gemm.s | 3 ++- extra/gemm/asm/test.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/extra/gemm/asm/gemm.s b/extra/gemm/asm/gemm.s index 1f1bb22611..dcb93630a6 100644 --- a/extra/gemm/asm/gemm.s +++ b/extra/gemm/asm/gemm.s @@ -1,6 +1,7 @@ // ** global buffers s_load_dwordx2 s[28:29], s[0:1], 0x0 // C - s_load_dwordx4 s[32:35], s[0:1], 0x8 // A, B + s_load_dwordx2 s[34:35], s[0:1], 0x08 // A + s_load_dwordx2 s[32:33], s[0:1], 0x10 // B // ** others kernel args s_load_dword s24, s[0:1], 0x18 // N s_load_dword s54, s[0:1], 0x1C // num work groups diff --git a/extra/gemm/asm/test.py b/extra/gemm/asm/test.py index 3b7dc3196e..d19f911a52 100644 --- a/extra/gemm/asm/test.py +++ b/extra/gemm/asm/test.py @@ -52,7 +52,7 @@ def get_asm_prg() -> ProgramSpec: lib = Device[Device.DEFAULT].compiler.compile(src) return ProgramSpec("gemm", src, Device.DEFAULT, ast, lib=lib, global_size=[NUM_WG, 1, 1], local_size=[THREADS_PER_WG, 1, 1], globals=[0, 1, 2], vars=[UOp.variable("SZ", 256, 8192), UOp.variable("NUM_WG", 1, 1024)]) -eis.append(ExecItem(ast, [C_asm.uop.buffer, from_torch(B).uop.buffer, from_torch(A).uop.buffer], fixedvars={"SZ":N, "NUM_WG":NUM_WG}, +eis.append(ExecItem(ast, [C_asm.uop.buffer, from_torch(A).uop.buffer, from_torch(B).uop.buffer], fixedvars={"SZ":N, "NUM_WG":NUM_WG}, prg=CompiledRunner(get_asm_prg()))) with Context(DEBUG=2): From 17ef4af72cd3d8b11123dff52698ec52d2f95f1e Mon Sep 17 00:00:00 2001 From: chenyu Date: Thu, 1 Jan 2026 09:02:41 -0500 Subject: [PATCH 17/25] new ceildiv that fixed symbolic conv (#13944) * new ceildiv that fixed symbolic conv * smaller test case --- test/test_symbolic_ops.py | 21 +++++++++++++-------- test/unit/test_helpers.py | 20 ++++++++++++++++++++ tinygrad/helpers.py | 5 ++++- 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/test/test_symbolic_ops.py b/test/test_symbolic_ops.py index 7ae45c24f4..6b7ea926ce 100644 --- a/test/test_symbolic_ops.py +++ b/test/test_symbolic_ops.py @@ -288,17 +288,22 @@ class TestSymbolicOps(unittest.TestCase): np.testing.assert_allclose(symbolic, expected, atol=1e-6, rtol=0) def test_conv2d_ceildiv_edge_case(self): - v = Variable('v', 11, 50_000) - val = 39601 - x = Tensor.randn(1, 22, 50_000)[:, :, :v.bind(val)] - weight = Tensor.randn(256, 22, 12) + # tests symbolic ceildiv in conv2d output shape calculation + # val=79 triggers the edge case where old ceildiv simplifies incorrectly: old gives floor=12, correct ceildiv=13 + v = Variable('v', 11, 100) + val = 79 + x_full = Tensor.randn(1, 8, 100) + weight = Tensor.randn(16, 8, 12) - result = x.conv2d(weight=weight, groups=1, stride=6, dilation=1, padding=(3, 3)) + # symbolic version + result = x_full[:, :, :v.bind(val)].conv2d(weight=weight, groups=1, stride=6, dilation=1, padding=(3, 3)) var_val = {v.expr: val} shape = tuple(sym_infer(s, var_val) for s in result.shape) - with self.assertRaises(AssertionError): - self.assertEqual(shape, (1, 256, 6600)) # TODO: fails if ceildiv is incorrect - # TODO: test output is correct + self.assertEqual(shape, (1, 16, 13)) + + # concrete version for comparison + expected = x_full[:, :, :val].conv2d(weight=weight, groups=1, stride=6, dilation=1, padding=(3, 3)) + np.testing.assert_allclose(result[:, :, :13].numpy(), expected.numpy(), atol=1e-5, rtol=1e-5) if __name__ == '__main__': unittest.main() diff --git a/test/unit/test_helpers.py b/test/unit/test_helpers.py index f72486ae52..ff615ee21f 100644 --- a/test/unit/test_helpers.py +++ b/test/unit/test_helpers.py @@ -2,6 +2,7 @@ import ctypes, gzip, unittest, timeit, pickle from tinygrad import Variable from tinygrad.helpers import Context, ContextVar, argfix, colored, word_wrap, is_numpy_ndarray, mv_address, get_contraction, count from tinygrad.helpers import merge_dicts, strip_parens, prod, round_up, fetch, fully_flatten, from_mv, to_mv, polyN, time_to_str, cdiv, cmod, getbits +from tinygrad.helpers import ceildiv from tinygrad.tensor import Tensor, get_shape import numpy as np @@ -120,6 +121,25 @@ class TestRoundUp(unittest.TestCase): self.assertEqual(round_up(232, 24984), 24984) self.assertEqual(round_up(24984, 232), 25056) +class TestCeilDiv(unittest.TestCase): + def test_int(self): + self.assertEqual(ceildiv(10, 3), 4) + self.assertEqual(ceildiv(9, 3), 3) + self.assertEqual(ceildiv(0, 5), 0) + self.assertEqual(ceildiv(1, 5), 1) + def test_symbolic(self): + # tests that ceildiv with UOp uses (num + amt - 1) // amt formula for non-negative num + v = Variable('v', 0, 100) + result = ceildiv(v, 6) + self.assertEqual(result.render(), "((v+5)//6)") + def test_symbolic_negative_offset(self): + # tests ceildiv(v-5, 6) which is used in conv2d output shape + # old implementation incorrectly simplified -(x//-y) to ((v+1)//6-1) for v-5 + # new implementation uses (v-5+5)//6 = v//6 which is correct + v = Variable('v', 11, 100) + result = ceildiv(v - 5, 6) + self.assertEqual(result.render(), "(v//6)") + class TestCount(unittest.TestCase): def test_count_basic(self): c = count(3) diff --git a/tinygrad/helpers.py b/tinygrad/helpers.py index f5660f8f3f..e3abbde9e4 100644 --- a/tinygrad/helpers.py +++ b/tinygrad/helpers.py @@ -43,7 +43,10 @@ def fully_flatten(l): def fromimport(mod, frm): return getattr(__import__(mod, fromlist=[frm]), frm) def _is_balanced(s:str) -> bool: return (d := 0, all((d := d + (c == '(') - (c == ')')) >= 0 for c in s))[1] and d == 0 def strip_parens(fst:str) -> str: return fst[1:-1] if fst[:1]=='(' and fst[-1:]==')' and _is_balanced(fst[1:-1]) else fst -def ceildiv(num, amt): return int(ret) if isinstance((ret:=-(num//-amt)), float) else ret +def ceildiv(num, amt): + # use (num + amt - 1) // amt when num is a UOp and non-negative to avoid C/Python division mismatch + if hasattr(num, 'vmin') and num.vmin >= 0 and (amt > 0 if isinstance(amt, int) else amt.vmin > 0): return (num + amt - 1) // amt + return int(ret) if isinstance((ret:=-(num//-amt)), float) else ret def round_up(num:int, amt:int) -> int: return (num+amt-1)//amt * amt def round_down(num:int, amt:int) -> int: return -round_up(-num, amt) def next_power2(x): return 1 if x == 0 else 1 << (x - 1).bit_length() From b91b46091c0f11d0cfe6bdcf198ff3b0fb15eba9 Mon Sep 17 00:00:00 2001 From: chenyu Date: Thu, 1 Jan 2026 09:25:05 -0500 Subject: [PATCH 18/25] delete test_tensor_uop (#13951) old test for shape tracker. also update tests that refer shapetracker names --- test/test_tensor_uop.py | 117 ------------------ ...est_masked_st.py => test_masked_tensor.py} | 5 +- ...hapetracker.py => test_symbolic_tensor.py} | 0 3 files changed, 1 insertion(+), 121 deletions(-) delete mode 100644 test/test_tensor_uop.py rename test/unit/{test_masked_st.py => test_masked_tensor.py} (78%) rename test/unit/{test_symbolic_shapetracker.py => test_symbolic_tensor.py} (100%) diff --git a/test/test_tensor_uop.py b/test/test_tensor_uop.py deleted file mode 100644 index 21dfe41b57..0000000000 --- a/test/test_tensor_uop.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python -import numpy as np -import unittest -from tinygrad import Tensor, Device, dtypes -from tinygrad.engine.realize import run_schedule -from tinygrad.uop.ops import UOp -from tinygrad.helpers import SPLIT_REDUCEOP - -class TestTensorUOp(unittest.TestCase): - def test_fromcpu_shape_tracker(self): - def helper(a: np.ndarray): - print(a.shape, a.strides, a.flags.c_contiguous) - b = Tensor(a).uop - assert b.shape == a.shape - np.testing.assert_equal(a, Tensor(b).numpy()) - - for ndims in range(1, 4): - a = np.random.randn(*(4,)*ndims).astype(np.float32) - for stride in [-2, 1, 2]: - for start in [0, 1]: - helper(a[(slice(start, None, stride),)*ndims]) - - def test_shuffle_pad_ops_cmpeq(self): - y = Tensor([1]).cat(Tensor([1]) == 0).numpy() - z = Tensor([1, 0]).numpy() - np.testing.assert_allclose(y, z) - - def test_shuffle_pad_ops_div(self): - y = Tensor([1]).cat(Tensor([1]).div(Tensor([2.0]))).numpy() - z = Tensor([1, 0.5]).numpy() - np.testing.assert_allclose(y, z) - - def test_shuffle_pad_ops_log(self): - y = Tensor([1]).cat(Tensor([1]).log()).numpy() - z = Tensor([1, 0]).numpy() - np.testing.assert_allclose(y, z) - - def test_shuffle_pad_ops_exp(self): - y = Tensor([1]).cat(Tensor([1]).exp()).numpy() - z = Tensor([1, np.e]).numpy() - np.testing.assert_allclose(y, z) - - def test_device_0_is_the_same_device(self): - a = Tensor([1, 2, 3], f"{Device.DEFAULT}") - b = Tensor([1, 2, 3], f"{Device.DEFAULT}:0") - assert a.device == b.device - - def test_shrink_const_into_zero(self): - # regression test to make sure the shapetracker is preserved - a = Tensor.zeros(4,4,4).shrink((None, (0,0), None)) - b = Tensor.zeros(4,1,4) - c = a.cat(b, dim=1) - np.testing.assert_allclose(c.numpy(), np.concatenate((a.numpy(), b.numpy()), axis=1)) - - def test_shrink_const_then_cast(self): - # regression test to make sure the shapetracker is preserved - a = Tensor.zeros(4,4,4).shrink((None, (0,0), None)).cast(dtypes.int32) - b = Tensor.zeros(4,1,4) - c = a.cat(b, dim=1) - np.testing.assert_allclose(c.numpy(), np.concatenate((a.numpy(), b.numpy()), axis=1)) - - def test_const_dtype(self): - lb: UOp = Tensor([1], dtype=dtypes.int).uop - assert lb.const_like(1).base.arg == 1 - assert type(lb.const_like(1).base.arg) is int - - lb: UOp = Tensor([1], dtype=dtypes.float).uop - assert lb.const_like(1).base.arg == 1.0 - assert type(lb.const_like(1).base.arg) is float - - def test_contiguous_alu(self): - a = Tensor.randn(2, 2).realize() - b = Tensor.randn(2, 2).realize() - add = (a+b).contiguous() - out = add+2 - sched = out.schedule() - self.assertEqual(len(sched), 2) - run_schedule(sched) - np.testing.assert_allclose(out.numpy(), a.numpy()+b.numpy()+2) - - # NOTE: contiguous on a buffer collapses - @unittest.skip("contiguous on a buffer no longer collapses") - def test_contiguous_empty(self): - empty = Tensor.empty(1).contiguous() - sched = empty.schedule() - self.assertEqual(len(sched), 0) - - def test_contiguous_folded_alu(self): - a = Tensor.empty(8, 8) - # NOTE: the buffer for mul_0 late folds to just a CONST - mul_0 = a*0 - out = mul_0.shrink(((4, 8), (0, 8))).contiguous() - out.realize() - self.assertEqual(out.tolist(), Tensor.zeros(4, 8).tolist()) - -@unittest.skipUnless(SPLIT_REDUCEOP, "only for SPLIT_REDUCEOP") -class TestReduceOp(unittest.TestCase): - def test_no_split_reduce_kernel(self): - a = Tensor.rand(4, 4).realize() - a = a.sum() - sched = a.schedule() - assert len(sched) == 1 - - def test_split_reduce_kernel_dim0(self): - a = Tensor.rand(256, 255).realize() - a = a.sum() - sched = a.schedule() - assert len(sched) == 2 - - def test_split_reduce_kernel_dim1(self): - a = Tensor.rand(255, 256).realize() - a = a.sum() - sched = a.schedule() - assert len(sched) == 2 - -if __name__ == "__main__": - unittest.main() diff --git a/test/unit/test_masked_st.py b/test/unit/test_masked_tensor.py similarity index 78% rename from test/unit/test_masked_st.py rename to test/unit/test_masked_tensor.py index ce88a710a1..45f379e36c 100644 --- a/test/unit/test_masked_st.py +++ b/test/unit/test_masked_tensor.py @@ -1,13 +1,12 @@ import unittest from tinygrad.tensor import Tensor -class TestMaskedShapeTracker(unittest.TestCase): +class TestMaskedTensor(unittest.TestCase): def test_mul_masked(self): a = Tensor([1,1,1,1,1]) b = Tensor([1,1]).pad(((0,3),)) c = a*b assert c.shape == a.shape - #assert c.uop.st.views[0].mask is not None ret = c.data() assert ret.tolist() == [1.0, 1.0, 0.0, 0.0, 0.0] @@ -16,7 +15,6 @@ class TestMaskedShapeTracker(unittest.TestCase): b = Tensor([1,1]).pad(((0,3),)) c = a*b assert c.shape == a.shape - #assert c.uop.st.views[0].mask is not None ret = c.data() assert ret.tolist() == [1.0, 1.0, 0.0, 0.0, 0.0] @@ -24,7 +22,6 @@ class TestMaskedShapeTracker(unittest.TestCase): a = Tensor([1,1]).pad(((0,2),)) b = Tensor([1,1]).pad(((0,2),)) c = a+b - #assert c.uop.st.views[0].mask is not None ret = c.data() assert ret.tolist() == [2.0, 2.0, 0.0, 0.0] diff --git a/test/unit/test_symbolic_shapetracker.py b/test/unit/test_symbolic_tensor.py similarity index 100% rename from test/unit/test_symbolic_shapetracker.py rename to test/unit/test_symbolic_tensor.py From c69470be52c858daf8308e037e064f45f1eda330 Mon Sep 17 00:00:00 2001 From: chenyu Date: Thu, 1 Jan 2026 09:41:07 -0500 Subject: [PATCH 19/25] fix test_symbolic_arange_sym_step (#13952) --- test/test_tensor_variable.py | 14 ++++++++++++-- tinygrad/tensor.py | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/test/test_tensor_variable.py b/test/test_tensor_variable.py index 643b1c5dec..b05529c71c 100644 --- a/test/test_tensor_variable.py +++ b/test/test_tensor_variable.py @@ -73,8 +73,6 @@ class TestTensorVariable(unittest.TestCase): ret = Tensor.arange(vv.bind(4), 7) self.assertListEqual(ret[:3].tolist(), [4,5,6]) - # TODO: add vmin/vmax pattern for symbolic denominator - @unittest.expectedFailure def test_symbolic_arange_sym_step(self): vv = Variable("step", 1, 3) ret = Tensor.arange(0, 10, vv.bind(2)) @@ -86,6 +84,18 @@ class TestTensorVariable(unittest.TestCase): ret = Tensor.arange(begin.bind(4), end.bind(7)) self.assertListEqual(ret[:3].tolist(), [4,5,6]) + def test_symbolic_arange_three_vars(self): + begin = Variable("b", 0, 5) + end = Variable("e", 10, 20) + step = Variable("s", 1, 3) + ret = Tensor.arange(begin.bind(2), end.bind(14), step.bind(3)) + self.assertListEqual(ret[:4].tolist(), [2,5,8,11]) + + def test_symbolic_full(self): + vv = Variable("x", 1, 10).bind(5) + t = Tensor.full((3,), vv) + self.assertListEqual(t.tolist(), [5,5,5]) + def test_variable_empty(self): v = Variable("i", 1, 10) # TODO: Tensor creation from unbound variable should assert diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index c50dcfcb6a..652fbe8511 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -127,7 +127,7 @@ class Tensor(OpMixin): # create a UOp from the different types of inputs if isinstance(data, UOp): - assert _dtype is None or _dtype==data.dtype, f"dtype doesn't match ({_dtype} vs {data.dtype}), and casting isn't supported" + assert _dtype is None or _dtype==data.dtype or data.dtype==dtypes.index, f"dtype mismatch: {_dtype} vs {data.dtype}" # if data is dtype.index that means that this is a symbolic int and we need to lower it to something we can make a Tensor out of if data.dtype==dtypes.index: data = _index_to_concrete_int(data) if data.op is Ops.BIND: # type: ignore # mypy type narrowing is bugged here From c0f52c9dcb7fb512d209811afbb23fa8b06ad386 Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Fri, 2 Jan 2026 00:10:22 +0900 Subject: [PATCH 20/25] split assembly gemm to per arch directory (#13953) --- extra/gemm/asm/{ => cdna}/gemm.s | 0 extra/gemm/asm/{ => cdna}/template.s | 0 extra/gemm/asm/{ => cdna}/test.py | 0 extra/gemm/asm/unpack_kd.py | 10 +++++----- 4 files changed, 5 insertions(+), 5 deletions(-) rename extra/gemm/asm/{ => cdna}/gemm.s (100%) rename extra/gemm/asm/{ => cdna}/template.s (100%) rename extra/gemm/asm/{ => cdna}/test.py (100%) diff --git a/extra/gemm/asm/gemm.s b/extra/gemm/asm/cdna/gemm.s similarity index 100% rename from extra/gemm/asm/gemm.s rename to extra/gemm/asm/cdna/gemm.s diff --git a/extra/gemm/asm/template.s b/extra/gemm/asm/cdna/template.s similarity index 100% rename from extra/gemm/asm/template.s rename to extra/gemm/asm/cdna/template.s diff --git a/extra/gemm/asm/test.py b/extra/gemm/asm/cdna/test.py similarity index 100% rename from extra/gemm/asm/test.py rename to extra/gemm/asm/cdna/test.py diff --git a/extra/gemm/asm/unpack_kd.py b/extra/gemm/asm/unpack_kd.py index 150e4c195b..a1447220ff 100644 --- a/extra/gemm/asm/unpack_kd.py +++ b/extra/gemm/asm/unpack_kd.py @@ -1,12 +1,12 @@ -# unpack the complete kernel descriptor of an amdgpu ELF of for gfx950 +# unpack the complete kernel descriptor of an amdgpu ELF # https://rocm.docs.amd.com/projects/llvm-project/en/latest/LLVM/llvm/html/AMDGPUUsage.html#code-object-v3-kernel-descriptor -import struct, pathlib +import struct, pathlib, sys from tinygrad.runtime.support.elf import elf_loader def bits(x, lo, hi): return (x >> lo) & ((1 << (hi - lo + 1)) - 1) def assert_zero(x, lo, hi): assert bits(x, lo, hi) == 0 -with open(fp:=pathlib.Path(__file__).parent/"lib", "rb") as f: +with open(sys.argv[1], "rb") as f: lib = f.read() image, sections, relocs = elf_loader(lib) @@ -49,7 +49,7 @@ print("COMPUTE_PGM_RSRC3: 0x%08x" % pgm_rsrc3) print("COMPUTE_PGM_RSRC1: 0x%08x" % pgm_rsrc1) print("COMPUTE_PGM_RSRC2: 0x%08x" % pgm_rsrc2) -# rsrc 3 +# rsrc 3 (gfx950) accum_offset_raw = bits(pgm_rsrc3, 0, 5) assert_zero(pgm_rsrc3, 6, 15) @@ -169,10 +169,10 @@ assert_zero(desc, 458, 459) uses_dynamic_stack = bits(desc, 459, 460) print("DESC.USES_DYNAMIC_STACK:", uses_dynamic_stack) +# gfx950 only assert_zero(desc, 460, 463) kernarg_preload_spec_length = bits(desc, 464, 470) print("DESC.KERNARG_PRELOAD_SPEC_LENGTH:", kernarg_preload_spec_length) - kernarg_preload_spec_offset = bits(desc, 471, 479) print("DESC.KERNARG_PRELOAD_SPEC_OFFSET:", kernarg_preload_spec_offset) From 9726500de851398248d9da3f9ff31b0b9240fecf Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Fri, 2 Jan 2026 00:12:01 +0900 Subject: [PATCH 21/25] enable using assembly in Tensor.custom_kernel (#13895) --- extra/remu/test/hwtest.py | 21 +++++++++++---------- tinygrad/codegen/__init__.py | 6 ++++-- tinygrad/engine/realize.py | 2 +- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/extra/remu/test/hwtest.py b/extra/remu/test/hwtest.py index e740a1b4a2..1878427d79 100644 --- a/extra/remu/test/hwtest.py +++ b/extra/remu/test/hwtest.py @@ -3,18 +3,21 @@ import numpy as np import unittest -import subprocess, struct, math, textwrap +import subprocess, struct, math, textwrap, functools from tinygrad import Tensor, dtypes, Device, UOp -from tinygrad.uop.ops import Ops +from tinygrad.uop.ops import Ops, KernelInfo from tinygrad.helpers import getenv -from tinygrad.runtime.support.compiler_amd import amdgpu_disassemble -from tinygrad.renderer import ProgramSpec -from tinygrad.engine.realize import CompiledRunner from extra.assembly.amd.autogen.rdna3.ins import * from extra.assembly.amd.asm import waitcnt from test.testextra.test_cfg_viz import template +def custom_src(out:UOp, src:str, device:str, n_threads:int=1, n_workgroups:int=1) -> UOp: + lidx = UOp.special(n_threads, "lidx0") + gidx = UOp.special(n_workgroups, "gidx0") + sink = UOp.sink(out, lidx, gidx, arg=KernelInfo(name="test")) + return UOp(Ops.PROGRAM, src=(sink, UOp(Ops.DEVICE, arg=device), UOp(Ops.LINEAR, src=(*sink.src, sink)), UOp(Ops.SOURCE, arg=src))) + def get_output(asm:list, n_threads:int=1, vdst:VGPR=v[1]): out = Tensor([0]*n_threads, dtype=dtypes.uint32).realize() src = "\n".join(inst.disasm() for inst in [ @@ -26,11 +29,9 @@ def get_output(asm:list, n_threads:int=1, vdst:VGPR=v[1]): global_store_b32(addr=v[0], data=vdst, saddr=s[0:1]), s_endpgm() ]) - prg = ProgramSpec("test", template.replace("fn_name", "test").replace("INSTRUCTION", textwrap.dedent(src)), Device.DEFAULT, UOp(Ops.SINK), - global_size=[1, 1, 1], local_size=[n_threads, 1, 1], globals=[0]) - car = CompiledRunner(prg) - if getenv("PRINT_ASM"): amdgpu_disassemble(car.lib) - car([out.uop.buffer], {}, wait=True) + src = template.replace("fn_name", "test").replace("INSTRUCTION", textwrap.dedent(src)) + out = Tensor.custom_kernel(out, fxn=functools.partial(custom_src, src=src, device=out.device, n_threads=n_threads))[0] + out.realize() return out.tolist() def f16_to_bits(x:float) -> int: return struct.unpack(' Program if ast.arg is None: ast = ast.replace(arg=KernelInfo()) # rewrite to prg - full_sink = full_rewrite_to_sink(ast, renderer, optimize=ast.tag is None) - prg = UOp(Ops.PROGRAM, src=(full_sink, UOp(Ops.DEVICE, arg=renderer.device))) + if ast.op is Ops.PROGRAM: prg = ast + else: + full_sink = full_rewrite_to_sink(ast, renderer, optimize=ast.tag is None) + prg = UOp(Ops.PROGRAM, src=(full_sink, UOp(Ops.DEVICE, arg=renderer.device))) prg = graph_rewrite(prg, pm_to_program, ctx=renderer, name="linearize/render") # create the ProgramSpec diff --git a/tinygrad/engine/realize.py b/tinygrad/engine/realize.py index f6d90dfbbf..de89a33e02 100644 --- a/tinygrad/engine/realize.py +++ b/tinygrad/engine/realize.py @@ -125,7 +125,7 @@ def get_runner(device:str, ast:UOp) -> CompiledRunner: # NOTE: ctx is the buffers si_lowerer = PatternMatcher([ - (UPat(Ops.SINK, name="sink"), lambda ctx,sink: get_runner(ctx[0].device, sink)), + (UPat((Ops.SINK, Ops.PROGRAM), name="sink"), lambda ctx,sink: get_runner(ctx[0].device, sink)), (UPat(Ops.BUFFER_VIEW), lambda ctx: ViewOp(ctx[0])), (UPat(Ops.COPY, name="copy"), lambda ctx,copy: (BufferXfer(ctx[0].nbytes, ctx[0].device, ctx[1].device) \ if hasattr(Device[ctx[0].device].allocator, '_transfer') and all_same([x.device.split(":")[0] for x in ctx]) \ From 24723327ac407edcc42117117db60ec327630fe5 Mon Sep 17 00:00:00 2001 From: b1tg <33436708+b1tg@users.noreply.github.com> Date: Thu, 1 Jan 2026 23:25:08 +0800 Subject: [PATCH 22/25] fix tc_up in search (#13438) * tensor_core is missing from Scheduler * test upcast max --------- Co-authored-by: chenyu --- extra/optimization/test_beam_search.py | 30 ++++++++++++++++++++++++-- tinygrad/codegen/opt/postrange.py | 2 ++ tinygrad/codegen/opt/search.py | 4 ++-- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/extra/optimization/test_beam_search.py b/extra/optimization/test_beam_search.py index f493ec48eb..36aba141b6 100644 --- a/extra/optimization/test_beam_search.py +++ b/extra/optimization/test_beam_search.py @@ -1,9 +1,13 @@ import unittest import numpy as np -from tinygrad.helpers import BEAM, Timing, CI, Context -from tinygrad import Variable, Tensor +from tinygrad.helpers import BEAM, Timing, CI, prod +from tinygrad import Variable, Device, Tensor from tinygrad.nn import Conv2d +from tinygrad.uop.ops import AxisType +from tinygrad.codegen.opt import Opt, OptOps +from tinygrad.codegen.opt.postrange import Scheduler +from tinygrad.codegen.opt.search import get_kernel_actions def rand(*shape): return Tensor(np.random.rand(*shape).astype(np.float32)) @@ -75,5 +79,27 @@ class TestBeamSearch(unittest.TestCase): a = (a + a) * a a.realize() + @unittest.skipUnless(Device[Device.DEFAULT].renderer.tensor_cores, "test requires tensor cores") + def test_tc_up(self): + tc = Device[Device.DEFAULT].renderer.tensor_cores[0] + size = max(tc.dims[0], tc.dims[1]) * 8 + a, b = Tensor.rand(size, size, dtype=tc.dtype_in), Tensor.rand(size, size, dtype=tc.dtype_in) + ast = a.matmul(b, dtype=tc.dtype_out).schedule()[-1].ast + s = Scheduler(ast, Device[Device.DEFAULT].renderer) + s.apply_opt(Opt(OptOps.TC, 0, (-1, 0, 1))) + up = prod([x for x, t in zip(s.full_shape, s.axis_types) if t in (AxisType.UPCAST, AxisType.UNROLL)]) + actions = get_kernel_actions(s, include_0=False, max_up=int(up)) + upcasted = [s for s in actions.values() if any(opt.op in (OptOps.UPCAST, OptOps.UNROLL) for opt in s.applied_opts)] + assert len(upcasted) > 0, f"expected upcast/unroll actions after TC with max_up={up}, but got none" + + def test_max_up(self): + a = Tensor.rand(16, 16) + ast = a.schedule()[-1].ast + s = Scheduler(ast, Device[Device.DEFAULT].renderer) + for max_up in (2, 4): + actions = get_kernel_actions(s, include_0=False, max_up=max_up) + for up_opts in [s.applied_opts for s in actions.values() if any(opt.op in (OptOps.UPCAST, OptOps.UNROLL) for opt in s.applied_opts)]: + assert len([opt for opt in up_opts if opt.arg > max_up]) == 0 and len([op for op in up_opts if op.arg <= max_up]) > 0 + if __name__ == '__main__': unittest.main() diff --git a/tinygrad/codegen/opt/postrange.py b/tinygrad/codegen/opt/postrange.py index b00bd5bea3..fd86308a95 100644 --- a/tinygrad/codegen/opt/postrange.py +++ b/tinygrad/codegen/opt/postrange.py @@ -45,6 +45,7 @@ class Scheduler: ret = Scheduler(self.ast, self.ren) ret.dont_use_locals = self.dont_use_locals ret.applied_opts = self.applied_opts[:] + if hasattr(self, 'tensor_core'): ret.tensor_core = self.tensor_core return ret kernel_cnt: Final[defaultdict[str, int]] = defaultdict(int) @@ -307,6 +308,7 @@ class Scheduler: reduce_ranges = [x for x in UOp.sink(*reduceop.src[1:]).toposort() if x.op is Ops.RANGE and x.arg[0] not in tc_reduce_axes] if len(reduce_ranges): tc_uop = UOp(Ops.REDUCE, tc_uop.dtype, (tc_uop,)+tuple(reduce_ranges), Ops.ADD) self.ast = self.ast.substitute({reduceop: tc_uop}) + self.tensor_core = tc return axes return None diff --git a/tinygrad/codegen/opt/search.py b/tinygrad/codegen/opt/search.py index 18d7ea49bc..13e86e8924 100644 --- a/tinygrad/codegen/opt/search.py +++ b/tinygrad/codegen/opt/search.py @@ -93,8 +93,8 @@ def _ensure_buffer_alloc(bufs:list[Buffer]) -> list[Buffer]: return [buf.ensure_ # *** external API *** # get dictionary of all possible actions -def get_kernel_actions(s:Scheduler, include_0=True) -> dict[int, Scheduler]: - acted, max_up, max_lcl = {0:s} if include_0 else {}, getenv("BEAM_UPCAST_MAX", 256), getenv("BEAM_LOCAL_MAX", 1024) +def get_kernel_actions(s:Scheduler, include_0=True, max_up:int|None=None) -> dict[int, Scheduler]: + acted, max_up, max_lcl = {0:s} if include_0 else {}, getenv("BEAM_UPCAST_MAX", 256) if max_up is None else max_up, getenv("BEAM_LOCAL_MAX", 1024) kernel_actions = actions.copy() for i,a in enumerate(kernel_actions): From ce84a231427716949ade4e454ef081a773868c08 Mon Sep 17 00:00:00 2001 From: chenyu Date: Thu, 1 Jan 2026 10:55:36 -0500 Subject: [PATCH 23/25] remove tee in benchmark (#13954) --- .github/workflows/benchmark.yml | 306 ++++++++++---------------------- 1 file changed, 95 insertions(+), 211 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index e3f07acf9d..1d8bbc182e 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -49,19 +49,19 @@ jobs: - name: Print macOS version run: sw_vers - name: Run Stable Diffusion - run: BENCHMARK_LOG=stable_diffusion JIT=1 ASSERT_MIN_STEP_TIME=720 python3.11 examples/stable_diffusion.py --fp16 --seed 0 --noshow --timing | tee sd.txt + run: BENCHMARK_LOG=stable_diffusion JIT=1 ASSERT_MIN_STEP_TIME=720 python3.11 examples/stable_diffusion.py --fp16 --seed 0 --noshow --timing - name: Run Stable Diffusion without fp16 - run: BENCHMARK_LOG=stable_diffusion_fp32 JIT=1 ASSERT_MIN_STEP_TIME=720 python3.11 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd_no_fp16.txt + run: BENCHMARK_LOG=stable_diffusion_fp32 JIT=1 ASSERT_MIN_STEP_TIME=720 python3.11 examples/stable_diffusion.py --seed 0 --noshow --timing - name: Run Stable Diffusion v2 # TODO: very slow step time - run: BENCHMARK_LOG=stable_diffusion_v2 JIT=1 ASSERT_MIN_STEP_TIME=4500 python3.11 examples/sdv2.py --fp16 --seed 0 --noshow --timing | tee sdv2.txt + run: BENCHMARK_LOG=stable_diffusion_v2 JIT=1 ASSERT_MIN_STEP_TIME=4500 python3.11 examples/sdv2.py --fp16 --seed 0 --noshow --timing # process replay can't capture this, the graph is too large - name: Run SDXL - run: BENCHMARK_LOG=stable_diffusion_xl ASSERT_MIN_STEP_TIME=5000 CAPTURE_PROCESS_REPLAY=0 JIT=1 python3.11 examples/sdxl.py --seed 0 --noshow --timing | tee sdxl.txt + run: BENCHMARK_LOG=stable_diffusion_xl ASSERT_MIN_STEP_TIME=5000 CAPTURE_PROCESS_REPLAY=0 JIT=1 python3.11 examples/sdxl.py --seed 0 --noshow --timing - name: Run model inference benchmark run: METAL=1 NOCLANG=1 python3.11 test/external/external_model_benchmark.py - name: Test speed vs torch - run: BIG=2 MPS=1 python3.11 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt + run: BIG=2 MPS=1 python3.11 test/speed/external_test_speed_v_torch.py - name: Test tensor cores run: METAL=1 python3.11 test/opt/test_tensor_cores.py - name: Test AMX tensor cores @@ -71,84 +71,59 @@ jobs: DEBUG=2 CPU=1 CPU_LLVM=0 AMX=1 python3.11 test/opt/test_gen_float4.py TestFloat4.test_float4_multidim_amx TestFloat4.test_float4_multidim_unaligned_load_amx DEBUG=2 CPU=1 CPU_LLVM=1 AMX=1 python3.11 test/opt/test_gen_float4.py TestFloat4.test_float4_multidim_amx TestFloat4.test_float4_multidim_unaligned_load_amx - name: Run Tensor Core GEMM (float) - run: DEBUG=2 SHOULD_USE_TC=1 python3.11 extra/gemm/simple_matmul.py | tee matmul.txt + run: DEBUG=2 SHOULD_USE_TC=1 python3.11 extra/gemm/simple_matmul.py - name: Run Tensor Core GEMM (half) - run: DEBUG=2 SHOULD_USE_TC=1 HALF=1 python3.11 extra/gemm/simple_matmul.py | tee matmul_half.txt + run: DEBUG=2 SHOULD_USE_TC=1 HALF=1 python3.11 extra/gemm/simple_matmul.py - name: Run Tensor Core GEMM (bfloat16) - run: DEBUG=2 SHOULD_USE_TC=1 BFLOAT16=1 python3.11 extra/gemm/simple_matmul.py | tee matmul_bfloat16.txt + run: DEBUG=2 SHOULD_USE_TC=1 BFLOAT16=1 python3.11 extra/gemm/simple_matmul.py - name: Fuzz Padded Tensor Core GEMM run: METAL=1 M_START=6 M_STOP=10 M_STEP=1 N_START=6 N_STOP=10 N_STEP=1 K_START=6 K_STOP=24 K_STEP=1 TC_OPT=2 DEBUG=2 python3.11 ./extra/gemm/fuzz_matmul.py - name: Run LLaMA run: | - BENCHMARK_LOG=llama_nojit JIT=0 python3.11 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt - BENCHMARK_LOG=llama JIT=1 python3.11 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt + BENCHMARK_LOG=llama_nojit JIT=0 python3.11 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing + BENCHMARK_LOG=llama JIT=1 python3.11 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing - name: Run LLaMA with BEAM - run: BENCHMARK_LOG=llama_beam JITBEAM=2 IGNORE_BEAM_CACHE=1 python3.11 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_beam.txt + run: BENCHMARK_LOG=llama_beam JITBEAM=2 IGNORE_BEAM_CACHE=1 python3.11 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing - name: Run quantized LLaMA run: | - BENCHMARK_LOG=llama_int8 python3.11 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing --quantize int8 | tee llama_int8.txt - BENCHMARK_LOG=llama_nf4 python3.11 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing --quantize nf4 | tee llama_nf4.txt + BENCHMARK_LOG=llama_int8 python3.11 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing --quantize int8 + BENCHMARK_LOG=llama_nf4 python3.11 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing --quantize nf4 - name: Run quantized LLaMA3 run: | - BENCHMARK_LOG=llama3_int8 python3.11 examples/llama3.py --size 8B --temperature 0 --benchmark --quantize int8 | tee llama3_int8.txt - BENCHMARK_LOG=llama3_nf4 python3.11 examples/llama3.py --size 8B --temperature 0 --benchmark --quantize nf4 | tee llama3_nf4.txt + BENCHMARK_LOG=llama3_int8 python3.11 examples/llama3.py --size 8B --temperature 0 --benchmark --quantize int8 + BENCHMARK_LOG=llama3_nf4 python3.11 examples/llama3.py --size 8B --temperature 0 --benchmark --quantize nf4 #- name: Run LLaMA 7B on 4 (virtual) GPUs - # run: python3.11 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_four_gpu.txt + # run: python3.11 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 10 --temperature 0 --timing - name: Run GPT2 run: | - BENCHMARK_LOG=gpt2_nojit JIT=0 python3.11 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt - BENCHMARK_LOG=gpt2 JIT=1 ASSERT_MIN_STEP_TIME=13 python3.11 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt + BENCHMARK_LOG=gpt2_nojit JIT=0 python3.11 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing + BENCHMARK_LOG=gpt2 JIT=1 ASSERT_MIN_STEP_TIME=13 python3.11 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing - name: Run GPT2 w HALF - run: BENCHMARK_LOG=gpt2_half HALF=1 python3.11 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt + run: BENCHMARK_LOG=gpt2_half HALF=1 python3.11 examples/gpt2.py --count 10 --temperature 0 --timing - name: Run GPT2 w HALF/BEAM - run: BENCHMARK_LOG=gpt2_half_beam HALF=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3.11 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half_beam.txt + run: BENCHMARK_LOG=gpt2_half_beam HALF=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3.11 examples/gpt2.py --count 10 --temperature 0 --timing - name: Run OLMoE run: BENCHMARK_LOG=olmoe python3.11 examples/olmoe.py - name: Train MNIST - run: time PYTHONPATH=. TARGET_EVAL_ACC_PCT=96.0 python3.11 examples/beautiful_mnist.py | tee beautiful_mnist.txt + run: time PYTHONPATH=. TARGET_EVAL_ACC_PCT=96.0 python3.11 examples/beautiful_mnist.py # NOTE: this is failing in CI. it is not failing on my machine and I don't really have a way to debug it # the error is "RuntimeError: Internal Error (0000000e:Internal Error)" #- name: Run 10 CIFAR training steps - # run: BENCHMARK_LOG=cifar_10steps JIT=1 ASSERT_MIN_STEP_TIME=3000 STEPS=10 python3.11 examples/hlb_cifar10.py | tee train_cifar.txt + # run: BENCHMARK_LOG=cifar_10steps JIT=1 ASSERT_MIN_STEP_TIME=3000 STEPS=10 python3.11 examples/hlb_cifar10.py #- name: Run 10 CIFAR training steps w HALF - # run: BENCHMARK_LOG=cifar_10steps_half JIT=2 ASSERT_MIN_STEP_TIME=3000 STEPS=10 DEFAULT_FLOAT=HALF python3.11 examples/hlb_cifar10.py | tee train_cifar_half.txt + # run: BENCHMARK_LOG=cifar_10steps_half JIT=2 ASSERT_MIN_STEP_TIME=3000 STEPS=10 DEFAULT_FLOAT=HALF python3.11 examples/hlb_cifar10.py #- name: Run 10 CIFAR training steps w BF16 - # run: STEPS=10 DEFAULT_FLOAT=BFLOAT16 python3.11 examples/hlb_cifar10.py | tee train_cifar_bf16.txt + # run: STEPS=10 DEFAULT_FLOAT=BFLOAT16 python3.11 examples/hlb_cifar10.py # TODO: too slow # - name: Run 10 CIFAR training steps w winograd - # run: BENCHMARK_LOG=cifar_10steps_wino JIT=1 ASSERT_MIN_STEP_TIME=150 WINO=1 STEPS=10 python3.11 examples/hlb_cifar10.py | tee train_cifar_wino.txt + # run: BENCHMARK_LOG=cifar_10steps_wino JIT=1 ASSERT_MIN_STEP_TIME=150 WINO=1 STEPS=10 python3.11 examples/hlb_cifar10.py - uses: actions/upload-artifact@v4 with: name: Speed (Mac) path: | onnx_inference_speed.csv - torch_speed.txt - llama_unjitted.txt - llama_jitted.txt - llama_beam.txt - llama_int8.txt - llama_nf4.txt - llama3_int8.txt - llama3_nf4.txt - llama_four_gpu.txt - gpt2_unjitted.txt - gpt2_jitted.txt - gpt2_half.txt - gpt2_half_beam.txt - matmul.txt - matmul_half.txt - matmul_bfloat16.txt - sd.txt - sd_no_fp16.txt - sdv2.txt - sdxl.txt - beautiful_mnist.txt - train_cifar.txt - train_cifar_half.txt - train_cifar_bf16.txt - train_cifar_wino.txt - name: Run process replay tests run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3.11 process_replay.py @@ -215,7 +190,7 @@ jobs: - name: Run model inference benchmark run: NV=1 CAPTURE_PROCESS_REPLAY=0 NOCLANG=1 python3 test/external/external_model_benchmark.py - name: Test speed vs torch - run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt + run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py - name: Test speed vs theoretical run: NV=1 IGNORE_BEAM_CACHE=1 CCACHE=0 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20 - name: Test benchmark allreduce @@ -226,79 +201,58 @@ jobs: NV=1 NV_PTX=1 ALLOW_TF32=1 python3 test/opt/test_tensor_cores.py - name: Run Tensor Core GEMM (CUDA) run: | - CUDA=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt - CUDA=1 SHOULD_USE_TC=1 BFLOAT16=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_bfloat16.txt - CUDA=1 SHOULD_USE_TC=1 ALLOW_TF32=1 DEBUG=2 ATOL=2e-2 python3 extra/gemm/simple_matmul.py | tee matmul_tf32.txt - CUDA=1 SHOULD_USE_TC=1 FP8E4M3=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_fp8.txt + CUDA=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py + CUDA=1 SHOULD_USE_TC=1 BFLOAT16=1 DEBUG=2 python3 extra/gemm/simple_matmul.py + CUDA=1 SHOULD_USE_TC=1 ALLOW_TF32=1 DEBUG=2 ATOL=2e-2 python3 extra/gemm/simple_matmul.py + CUDA=1 SHOULD_USE_TC=1 FP8E4M3=1 DEBUG=2 python3 extra/gemm/simple_matmul.py - name: Run Tensor Core GEMM (PTX) - run: NV=1 NV_PTX=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_ptx.txt + run: NV=1 NV_PTX=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py - name: Run Tensor Core GEMM (NV) - run: NV=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_nv.txt + run: NV=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py - name: Test NV=1 run: DEBUG=2 NV=1 python -m pytest -rA test/test_tiny.py - name: Test CUDA=1 run: DEBUG=2 CUDA=1 python -m pytest -rA test/test_tiny.py - name: Run Stable Diffusion - run: BENCHMARK_LOG=stable_diffusion NV=1 python3 examples/stable_diffusion.py --fp16 --seed 0 --noshow --timing | tee sd.txt + run: BENCHMARK_LOG=stable_diffusion NV=1 python3 examples/stable_diffusion.py --fp16 --seed 0 --noshow --timing # TODO: too slow # - name: Run SDXL - # run: BENCHMARK_LOG=stable_diffusion_xl ASSERT_MIN_STEP_TIME=2000 CAPTURE_PROCESS_REPLAY=0 NV=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/sdxl.py --seed 0 --noshow --timing | tee sdxl.txt + # run: BENCHMARK_LOG=stable_diffusion_xl ASSERT_MIN_STEP_TIME=2000 CAPTURE_PROCESS_REPLAY=0 NV=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/sdxl.py --seed 0 --noshow --timing - name: Run LLaMA run: | - BENCHMARK_LOG=llama_nojit NV=1 JIT=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt - BENCHMARK_LOG=llama NV=1 JIT=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt + BENCHMARK_LOG=llama_nojit NV=1 JIT=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing + BENCHMARK_LOG=llama NV=1 JIT=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing - name: Run LLaMA with BEAM - run: BENCHMARK_LOG=llama_beam NV=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_beam.txt + run: BENCHMARK_LOG=llama_beam NV=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing # - name: Run LLaMA 7B on 4 GPUs - # run: NV=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_four_gpu.txt + # run: NV=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 10 --temperature 0 --timing # - name: Run LLaMA 7B on 6 GPUs - # run: NV=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama.py --gen 1 --size 7B --shard 6 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_six_gpu.txt + # run: NV=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama.py --gen 1 --size 7B --shard 6 --prompt "Hello." --count 10 --temperature 0 --timing - name: Run LLaMA-3 8B BEAM - run: BENCHMARK_LOG=llama3_beam NV=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/llama3.py --size 8B --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_beam.txt + run: BENCHMARK_LOG=llama3_beam NV=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/llama3.py --size 8B --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 - name: Run LLaMA-3 8B on 4 GPUs with BEAM - run: BENCHMARK_LOG=llama3_beam_4gpu NV=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 4 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_four_gpu.txt + run: BENCHMARK_LOG=llama3_beam_4gpu NV=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 4 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 - name: Run quantized LLaMA3 - run: BENCHMARK_LOG=llama3_fp8 python3 examples/llama3.py --size 8B --model weights/LLaMA-3/8B-SF-DPO/ --temperature 0 --benchmark --quantize fp8 | tee llama3_fp8.txt + run: BENCHMARK_LOG=llama3_fp8 python3 examples/llama3.py --size 8B --model weights/LLaMA-3/8B-SF-DPO/ --temperature 0 --benchmark --quantize fp8 # - name: Run LLaMA-3 8B on 6 GPUs - # run: NV=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 6 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_six_gpu.txt + # run: NV=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 6 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 # - name: Run LLaMA-2 70B - # run: NV=1 CAPTURE_PROCESS_REPLAY=0 MAX_CONTEXT=256 python3 examples/llama.py --gen 2 --size 70B --shard 6 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_2_70B.txt + # run: NV=1 CAPTURE_PROCESS_REPLAY=0 MAX_CONTEXT=256 python3 examples/llama.py --gen 2 --size 70B --shard 6 --prompt "Hello." --count 10 --temperature 0 --timing - name: Run Mixtral 8x7B - run: time BENCHMARK_LOG=mixtral NV=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/mixtral.py --temperature 0 --count 10 --timing | tee mixtral.txt + run: time BENCHMARK_LOG=mixtral NV=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/mixtral.py --temperature 0 --count 10 --timing - name: Run GPT2 run: | - BENCHMARK_LOG=gpt2_nojit NV=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt - BENCHMARK_LOG=gpt2 NV=1 JIT=1 ASSERT_MIN_STEP_TIME=4 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt + BENCHMARK_LOG=gpt2_nojit NV=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing + BENCHMARK_LOG=gpt2 NV=1 JIT=1 ASSERT_MIN_STEP_TIME=4 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing - name: Run GPT2 w HALF - run: BENCHMARK_LOG=gpt2_half NV=1 HALF=1 ASSERT_MIN_STEP_TIME=6 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt + run: BENCHMARK_LOG=gpt2_half NV=1 HALF=1 ASSERT_MIN_STEP_TIME=6 python3 examples/gpt2.py --count 10 --temperature 0 --timing - name: Run GPT2 w HALF/BEAM - run: BENCHMARK_LOG=gpt2_half_beam NV=1 HALF=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half_beam.txt + run: BENCHMARK_LOG=gpt2_half_beam NV=1 HALF=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing - uses: actions/upload-artifact@v4 with: name: Speed (NVIDIA) path: | onnx_inference_speed.csv - torch_speed.txt - matmul.txt - matmul_bfloat16.txt - matmul_tf32.txt - matmul_ptx.txt - matmul_nv.txt - sd.txt - sdxl.txt - llama_unjitted.txt - llama_jitted.txt - llama_beam.txt - llama3_beam.txt - llama3_four_gpu.txt - llama3_six_gpu.txt - llama3_fp8.txt - llama_2_70B.txt - mixtral.txt - gpt2_unjitted.txt - gpt2_jitted.txt - gpt2_half.txt - gpt2_half_beam.txt - name: Run process replay tests run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py @@ -339,42 +293,28 @@ jobs: - name: HEVC Decode Benchmark run: VALIDATE=1 MAX_FRAMES=100 NV=1 PYTHONPATH=. python3 extra/hevc/decode.py - name: Train MNIST - run: time PYTHONPATH=. NV=1 TARGET_EVAL_ACC_PCT=96.0 python3 examples/beautiful_mnist.py | tee beautiful_mnist.txt + run: time PYTHONPATH=. NV=1 TARGET_EVAL_ACC_PCT=96.0 python3 examples/beautiful_mnist.py - name: Run 10 CIFAR training steps - run: BENCHMARK_LOG=cifar_10steps ASSERT_MIN_STEP_TIME=120 NV=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt + run: BENCHMARK_LOG=cifar_10steps ASSERT_MIN_STEP_TIME=120 NV=1 STEPS=10 python3 examples/hlb_cifar10.py - name: Run 10 CIFAR training steps w HALF - run: BENCHMARK_LOG=cifar_10steps_half ASSERT_MIN_STEP_TIME=110 NV=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py | tee train_cifar_half.txt + run: BENCHMARK_LOG=cifar_10steps_half ASSERT_MIN_STEP_TIME=110 NV=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py - name: Run 10 CIFAR training steps w BF16 - run: BENCHMARK_LOG=cifar_10steps_bf16 ASSERT_MIN_STEP_TIME=120 NV=1 STEPS=10 DEFAULT_FLOAT=BFLOAT16 python3 examples/hlb_cifar10.py | tee train_cifar_bf16.txt + run: BENCHMARK_LOG=cifar_10steps_bf16 ASSERT_MIN_STEP_TIME=120 NV=1 STEPS=10 DEFAULT_FLOAT=BFLOAT16 python3 examples/hlb_cifar10.py # - name: Run 10 CIFAR training steps w winograd - # run: BENCHMARK_LOG=cifar_10steps_half_wino ASSERT_MIN_STEP_TIME=350 NV=1 CAPTURE_PROCESS_REPLAY=0 WINO=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py | tee train_cifar_wino.txt + # run: BENCHMARK_LOG=cifar_10steps_half_wino ASSERT_MIN_STEP_TIME=350 NV=1 WINO=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py - name: Run full CIFAR training w 1 GPU - run: time BENCHMARK_LOG=cifar NV=1 DEFAULT_FLOAT=HALF STEPS=1000 TARGET_EVAL_ACC_PCT=93.0 python3 examples/hlb_cifar10.py | tee train_cifar_one_gpu.txt + run: time BENCHMARK_LOG=cifar NV=1 DEFAULT_FLOAT=HALF STEPS=1000 TARGET_EVAL_ACC_PCT=93.0 python3 examples/hlb_cifar10.py - name: Run full CIFAR training steps w 6 GPUS - run: time BENCHMARK_LOG=cifar_6gpu CAPTURE_PROCESS_REPLAY=0 NV=1 DEFAULT_FLOAT=HALF STEPS=350 BS=1536 GPUS=6 TARGET_EVAL_ACC_PCT=93.0 python3 examples/hlb_cifar10.py | tee train_cifar_six_gpu.txt + run: time BENCHMARK_LOG=cifar_6gpu CAPTURE_PROCESS_REPLAY=0 NV=1 DEFAULT_FLOAT=HALF STEPS=350 BS=1536 GPUS=6 TARGET_EVAL_ACC_PCT=93.0 python3 examples/hlb_cifar10.py - name: Run MLPerf resnet eval on training data run: time BENCHMARK_LOG=resnet_eval NV=1 MODEL=resnet python3 examples/mlperf/model_eval.py - name: Run 10 MLPerf ResNet50 training steps (1 gpu) - run: BENCHMARK_LOG=resnet_10steps NV=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=256 GPUS=1 MODEL=resnet python3 examples/mlperf/model_train.py | tee train_resnet_one_gpu.txt + run: BENCHMARK_LOG=resnet_10steps NV=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=256 GPUS=1 MODEL=resnet python3 examples/mlperf/model_train.py - name: Run 10 MLPerf ResNet50 training steps (6 gpu) - run: BENCHMARK_LOG=resnet_10steps_6gpu NV=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=1536 GPUS=6 MODEL=resnet python3 examples/mlperf/model_train.py | tee train_resnet.txt + run: BENCHMARK_LOG=resnet_10steps_6gpu NV=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=1536 GPUS=6 MODEL=resnet python3 examples/mlperf/model_train.py - name: Run 10 MLPerf Bert training steps (6 gpu) # TODO: remove BERT_LAYERS once scheduler is fast - run: BENCHMARK_LOG=bert_10steps_6gpu NV=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=72 GPUS=6 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py | tee train_bert.txt - - uses: actions/upload-artifact@v4 - with: - name: Speed (NVIDIA Training) - path: | - beautiful_mnist.txt - train_cifar.txt - train_cifar_half.txt - train_cifar_bf16.txt - train_cifar_wino.txt - train_cifar_one_gpu.txt - train_cifar_six_gpu.txt - train_resnet.txt - train_resnet_one_gpu.txt - train_bert.txt + run: BENCHMARK_LOG=bert_10steps_6gpu NV=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=72 GPUS=6 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py - name: Run process replay tests run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py @@ -426,7 +366,7 @@ jobs: #- name: Test speed vs torch # run: | # python3 -c "import torch; print(torch.__version__)" - # LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt + # LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py - name: Test speed vs theoretical run: AMD=1 IGNORE_BEAM_CACHE=1 CCACHE=0 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20 - name: Test tensor cores AMD_LLVM=0 @@ -437,7 +377,7 @@ jobs: - name: Run Tensor Core GEMM (AMD) run: | AMD=1 SHOULD_USE_TC=1 BFLOAT16=1 DEBUG=2 python3 extra/gemm/simple_matmul.py - AMD=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 ATOL=2e-2 python3 extra/gemm/simple_matmul.py | tee matmul_amd.txt + AMD=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 ATOL=2e-2 python3 extra/gemm/simple_matmul.py - name: Test AMD=1 run: DEBUG=2 AMD=1 python -m pytest -rA test/test_tiny.py #- name: Test HIP=1 @@ -452,61 +392,39 @@ jobs: - name: Test AM warm start time run: time AMD=1 python3 test/test_tiny.py TestTiny.test_plus - name: Run Stable Diffusion - run: BENCHMARK_LOG=stable_diffusion ASSERT_MIN_STEP_TIME=550 AMD=1 python3 examples/stable_diffusion.py --fp16 --seed 0 --noshow --timing | tee sd.txt + run: BENCHMARK_LOG=stable_diffusion ASSERT_MIN_STEP_TIME=550 AMD=1 python3 examples/stable_diffusion.py --fp16 --seed 0 --noshow --timing - name: Run SDXL - run: BENCHMARK_LOG=stable_diffusion_xl ASSERT_MIN_STEP_TIME=3200 CAPTURE_PROCESS_REPLAY=0 AMD=1 python3 examples/sdxl.py --seed 0 --noshow --timing | tee sdxl.txt + run: BENCHMARK_LOG=stable_diffusion_xl ASSERT_MIN_STEP_TIME=3200 CAPTURE_PROCESS_REPLAY=0 AMD=1 python3 examples/sdxl.py --seed 0 --noshow --timing - name: Run LLaMA 7B run: | - BENCHMARK_LOG=llama_nojit AMD=1 JIT=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt - BENCHMARK_LOG=llama AMD=1 JIT=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt + BENCHMARK_LOG=llama_nojit AMD=1 JIT=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing + BENCHMARK_LOG=llama AMD=1 JIT=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing - name: Run LLaMA 7B with BEAM - run: BENCHMARK_LOG=llama_beam AMD=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_beam.txt + run: BENCHMARK_LOG=llama_beam AMD=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing # - name: Run LLaMA 7B on 4 GPUs - # run: AMD=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_four_gpu.txt + # run: AMD=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 10 --temperature 0 --timing # - name: Run LLaMA 7B on 6 GPUs - # run: AMD=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama.py --gen 1 --size 7B --shard 6 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_six_gpu.txt + # run: AMD=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama.py --gen 1 --size 7B --shard 6 --prompt "Hello." --count 10 --temperature 0 --timing - name: Run LLaMA-3 8B BEAM - run: BENCHMARK_LOG=llama3_beam AMD=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/llama3.py --size 8B --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_beam.txt + run: BENCHMARK_LOG=llama3_beam AMD=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/llama3.py --size 8B --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 - name: Run LLaMA-3 8B on 4 GPUs with BEAM - run: BENCHMARK_LOG=llama3_beam_4gpu AMD=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 4 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_four_gpu.txt + run: BENCHMARK_LOG=llama3_beam_4gpu AMD=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 4 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 # - name: Run LLaMA-3 8B on 6 GPUs - # run: AMD=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 6 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_six_gpu.txt + # run: AMD=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 6 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 #- name: Restore amdgpu # run: sudo modprobe amdgpu # - name: Run LLaMA-2 70B - # run: AMD=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama.py --gen 2 --size 70B --shard 6 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_2_70B.txt + # run: AMD=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama.py --gen 2 --size 70B --shard 6 --prompt "Hello." --count 10 --temperature 0 --timing - name: Run Mixtral 8x7B - run: time BENCHMARK_LOG=mixtral AMD=1 python3 examples/mixtral.py --temperature 0 --count 10 --timing | tee mixtral.txt + run: time BENCHMARK_LOG=mixtral AMD=1 python3 examples/mixtral.py --temperature 0 --count 10 --timing - name: Run GPT2 run: | - BENCHMARK_LOG=gpt2_nojit AMD=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt - BENCHMARK_LOG=gpt2 AMD=1 JIT=1 ASSERT_MIN_STEP_TIME=5 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt + BENCHMARK_LOG=gpt2_nojit AMD=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing + BENCHMARK_LOG=gpt2 AMD=1 JIT=1 ASSERT_MIN_STEP_TIME=5 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing - name: Run GPT2 w HALF - run: BENCHMARK_LOG=gpt2_half AMD=1 HALF=1 ASSERT_MIN_STEP_TIME=5 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt + run: BENCHMARK_LOG=gpt2_half AMD=1 HALF=1 ASSERT_MIN_STEP_TIME=5 python3 examples/gpt2.py --count 10 --temperature 0 --timing - name: Run GPT2 w HALF/BEAM - run: BENCHMARK_LOG=gpt2_half_beam AMD=1 HALF=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half_beam.txt - - uses: actions/upload-artifact@v4 - with: - name: Speed (AMD) - path: | - onnx_inference_speed.csv - torch_speed.txt - llama_unjitted.txt - llama_jitted.txt - llama_beam.txt - llama3_beam.txt - llama3_four_gpu.txt - llama3_six_gpu.txt - llama_2_70B.txt - gpt2_unjitted.txt - gpt2_jitted.txt - gpt2_half.txt - gpt2_half_beam.txt - matmul.txt - matmul_amd.txt - sd.txt - sdxl.txt - mixtral.txt + run: BENCHMARK_LOG=gpt2_half_beam AMD=1 HALF=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing - name: Run process replay tests run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py @@ -543,31 +461,20 @@ jobs: - name: reset process replay run: test/external/process_replay/reset.py - name: Train MNIST - run: time PYTHONPATH=. AMD=1 TARGET_EVAL_ACC_PCT=96.0 python3 examples/beautiful_mnist.py | tee beautiful_mnist.txt + run: time PYTHONPATH=. AMD=1 TARGET_EVAL_ACC_PCT=96.0 python3 examples/beautiful_mnist.py - name: Run 10 CIFAR training steps - run: BENCHMARK_LOG=cifar_10steps ASSERT_MIN_STEP_TIME=200 AMD=1 STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt + run: BENCHMARK_LOG=cifar_10steps ASSERT_MIN_STEP_TIME=200 AMD=1 STEPS=10 python3 examples/hlb_cifar10.py - name: Run 10 CIFAR training steps w HALF - run: BENCHMARK_LOG=cifar_10steps_half ASSERT_MIN_STEP_TIME=200 AMD=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py | tee train_cifar_half.txt + run: BENCHMARK_LOG=cifar_10steps_half ASSERT_MIN_STEP_TIME=200 AMD=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py # - name: Run 10 CIFAR training steps w BF16 - # run: BENCHMARK_LOG=cifar_10steps_bf16 ASSERT_MIN_STEP_TIME=288 AMD=1 STEPS=10 DEFAULT_FLOAT=BFLOAT16 python3 examples/hlb_cifar10.py | tee train_cifar_bf16.txt + # run: BENCHMARK_LOG=cifar_10steps_bf16 ASSERT_MIN_STEP_TIME=288 AMD=1 STEPS=10 DEFAULT_FLOAT=BFLOAT16 python3 examples/hlb_cifar10.py # TODO: too slow # - name: Run 10 CIFAR training steps w winograd - # run: BENCHMARK_LOG=cifar_10steps_half_wino ASSERT_MIN_STEP_TIME=66 AMD=1 WINO=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py | tee train_cifar_wino.txt + # run: BENCHMARK_LOG=cifar_10steps_half_wino ASSERT_MIN_STEP_TIME=66 AMD=1 WINO=1 STEPS=10 DEFAULT_FLOAT=HALF python3 examples/hlb_cifar10.py - name: Run full CIFAR training w 1 GPU - run: time BENCHMARK_LOG=cifar AMD=1 DEFAULT_FLOAT=HALF STEPS=1000 TARGET_EVAL_ACC_PCT=93.0 python3 examples/hlb_cifar10.py | tee train_cifar_one_gpu.txt + run: time BENCHMARK_LOG=cifar AMD=1 DEFAULT_FLOAT=HALF STEPS=1000 TARGET_EVAL_ACC_PCT=93.0 python3 examples/hlb_cifar10.py - name: Run full CIFAR training steps w 6 GPUS - run: time BENCHMARK_LOG=cifar_6gpu AMD=1 DEFAULT_FLOAT=HALF STEPS=350 BS=1536 GPUS=6 TARGET_EVAL_ACC_PCT=93.0 python3 examples/hlb_cifar10.py | tee train_cifar_six_gpu.txt - - uses: actions/upload-artifact@v4 - with: - name: Speed (AMD Training) - path: | - beautiful_mnist.txt - train_cifar.txt - train_cifar_half.txt - train_cifar_bf16.txt - train_cifar_wino.txt - train_cifar_one_gpu.txt - train_cifar_six_gpu.txt + run: time BENCHMARK_LOG=cifar_6gpu AMD=1 DEFAULT_FLOAT=HALF STEPS=350 BS=1536 GPUS=6 TARGET_EVAL_ACC_PCT=93.0 python3 examples/hlb_cifar10.py - name: Run process replay tests run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py @@ -606,19 +513,12 @@ jobs: - name: Run MLPerf resnet eval run: time BENCHMARK_LOG=resnet_eval AMD=1 MODEL=resnet python3 examples/mlperf/model_eval.py - name: Run 10 MLPerf ResNet50 training steps (1 gpu) - run: BENCHMARK_LOG=resnet_10steps AMD=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=256 GPUS=1 MODEL=resnet python3 examples/mlperf/model_train.py | tee train_resnet_one_gpu.txt + run: BENCHMARK_LOG=resnet_10steps AMD=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=256 GPUS=1 MODEL=resnet python3 examples/mlperf/model_train.py - name: Run 10 MLPerf ResNet50 training steps (6 gpu) - run: BENCHMARK_LOG=resnet_10steps_6gpu AMD=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=1536 GPUS=6 MODEL=resnet python3 examples/mlperf/model_train.py | tee train_resnet.txt + run: BENCHMARK_LOG=resnet_10steps_6gpu AMD=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=1536 GPUS=6 MODEL=resnet python3 examples/mlperf/model_train.py - name: Run 10 MLPerf Bert training steps (6 gpu) # TODO: remove BERT_LAYERS once scheduler is fast - run: BENCHMARK_LOG=bert_10steps_6gpu AMD=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=72 GPUS=6 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py | tee train_bert.txt - - uses: actions/upload-artifact@v4 - with: - name: Speed (AMD MLPerf) - path: | - train_resnet.txt - train_resnet_one_gpu.txt - train_bert.txt + run: BENCHMARK_LOG=bert_10steps_6gpu AMD=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=72 GPUS=6 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py - name: Run process replay tests run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py @@ -708,7 +608,7 @@ jobs: # AMD=1 AMD_LLVM=1 python3 test/test_linearizer.py test/opt/test_tensor_cores.py # AMD=1 SHOULD_USE_TC=1 BFLOAT16=1 DEBUG=2 python3 extra/gemm/simple_matmul.py - name: Run Tensor Core GEMM (AMD) - run: AMD=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 ATOL=2e-2 python3 extra/gemm/simple_matmul.py | tee am_matmul_amd.txt + run: AMD=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 ATOL=2e-2 python3 extra/gemm/simple_matmul.py - name: Test AMD=1 run: DEBUG=2 AMD=1 python -m pytest -rA test/test_tiny.py - name: Test DISK copy time @@ -718,20 +618,12 @@ jobs: AMD=1 GRAPH_ONE_KERNEL=1 PYTHONPATH=. NSZ=8192 python3 test/speed/external_test_copy_speed.py TestCopySpeed.testCopyDefaulttoCPUJit AMD=1 GRAPH_ONE_KERNEL=1 PYTHONPATH=. NSZ=8192 python3 test/speed/external_test_copy_speed.py TestCopySpeed.testCopyCPUtoDefaultJit - name: Run full CIFAR training w 1 GPU - run: time BENCHMARK_LOG=cifar AMD=1 DEFAULT_FLOAT=HALF STEPS=1000 TARGET_EVAL_ACC_PCT=93.0 python3 examples/hlb_cifar10.py | tee am_train_cifar_one_gpu.txt + run: time BENCHMARK_LOG=cifar AMD=1 DEFAULT_FLOAT=HALF STEPS=1000 TARGET_EVAL_ACC_PCT=93.0 python3 examples/hlb_cifar10.py # - name: Run 10 MLPerf ResNet50 training steps (1 gpu) - # run: BENCHMARK_LOG=resnet_10steps AMD=1 MNISTMOCK=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=256 GPUS=1 MODEL=resnet python3 examples/mlperf/model_train.py | tee am_train_resnet_one_gpu.txt + # run: BENCHMARK_LOG=resnet_10steps AMD=1 MNISTMOCK=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=256 GPUS=1 MODEL=resnet python3 examples/mlperf/model_train.py - name: Run 10 MLPerf Bert training steps (1 gpu) # TODO: remove BERT_LAYERS once scheduler is fast - run: BENCHMARK_LOG=bert_10steps AMD=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=66 GPUS=1 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py | tee am_train_bert_one_gpu.txt - - uses: actions/upload-artifact@v4 - with: - name: Speed (AM Driver) - path: | - am_matmul_amd.txt - am_train_cifar_one_gpu.txt - am_train_resnet_one_gpu.txt - am_train_bert_one_gpu.txt + run: BENCHMARK_LOG=bert_10steps AMD=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=66 GPUS=1 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py - name: Run process replay tests run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py @@ -778,21 +670,13 @@ jobs: NV=1 GRAPH_ONE_KERNEL=1 PYTHONPATH=. NSZ=8192 python3 test/speed/external_test_copy_speed.py TestCopySpeed.testCopyDefaulttoCPUJit NV=1 GRAPH_ONE_KERNEL=1 PYTHONPATH=. NSZ=8192 python3 test/speed/external_test_copy_speed.py TestCopySpeed.testCopyCPUtoDefaultJit - name: Test LLAMA-3 - run: BENCHMARK_LOG=llama3_beam NV=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/llama3.py --size 8B --benchmark --temperature 0 | tee nv_llama3_beam.txt + run: BENCHMARK_LOG=llama3_beam NV=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/llama3.py --size 8B --benchmark --temperature 0 - name: Run full CIFAR training w 1 GPU - run: time BENCHMARK_LOG=cifar NV=1 DEFAULT_FLOAT=HALF STEPS=1000 TARGET_EVAL_ACC_PCT=93.0 python3 examples/hlb_cifar10.py | tee nv_train_cifar_one_gpu.txt + run: time BENCHMARK_LOG=cifar NV=1 DEFAULT_FLOAT=HALF STEPS=1000 TARGET_EVAL_ACC_PCT=93.0 python3 examples/hlb_cifar10.py - name: Run 10 MLPerf ResNet50 training steps (1 gpu) - run: BENCHMARK_LOG=resnet_10steps NV=1 MNISTMOCK=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=256 GPUS=1 MODEL=resnet python3 examples/mlperf/model_train.py | tee nv_train_resnet_one_gpu.txt + run: BENCHMARK_LOG=resnet_10steps NV=1 MNISTMOCK=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=256 GPUS=1 MODEL=resnet python3 examples/mlperf/model_train.py - name: Run 10 MLPerf Bert training steps (1 gpu) # TODO: remove BERT_LAYERS once scheduler is fast - run: BENCHMARK_LOG=bert_10steps NV=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=66 GPUS=1 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py | tee nv_train_bert_one_gpu.txt - - uses: actions/upload-artifact@v4 - with: - name: Speed (NV Driver) - path: | - nv_llama3_beam.txt - nv_train_cifar_one_gpu.txt - nv_train_resnet_one_gpu.txt - nv_train_bert_one_gpu.txt + run: BENCHMARK_LOG=bert_10steps NV=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=66 GPUS=1 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py - name: Run process replay tests run: cp test/external/process_replay/process_replay.py ./process_replay.py && git fetch origin master && git -c advice.detachedHead=false checkout origin/master && PYTHONPATH=. python3 process_replay.py From ed222070f7580089446871ea357f9a830410febc Mon Sep 17 00:00:00 2001 From: chenyu Date: Thu, 1 Jan 2026 11:18:29 -0500 Subject: [PATCH 24/25] update xlog2 fp16 decomp to not use fp32 (#13955) --- test/test_transcendental.py | 38 ++++++++++++++++++++++++++++++++++ tinygrad/uop/decompositions.py | 16 +++++++------- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/test/test_transcendental.py b/test/test_transcendental.py index 503286773c..92b91a819c 100644 --- a/test/test_transcendental.py +++ b/test/test_transcendental.py @@ -101,6 +101,44 @@ class TestFromFuzzer(unittest.TestCase): _test_value(0) _test_value(0.0000009) +class TestFloat16Log2(unittest.TestCase): + """Tests for native float16 log2 implementation (no float32 cast)""" + @unittest.skipUnless(is_dtype_supported(dtypes.float16, Device.DEFAULT), f"no float16 on {Device.DEFAULT}") + def test_float16_log2_basic(self): + # basic values + test_values = [1.0, 2.0, 4.0, 0.5, 0.25, 10.0, 100.0, 1000.0] + with Context(TRANSCENDENTAL=2): + for val in test_values: + result = Tensor([val], dtype=dtypes.float16).log2().numpy()[0] + expected = np.log2(np.float16(val)) + np.testing.assert_allclose(result, expected, rtol=1e-3, err_msg=f"log2({val})") + + @unittest.skipUnless(is_dtype_supported(dtypes.float16, Device.DEFAULT), f"no float16 on {Device.DEFAULT}") + @unittest.skipIf(Device.DEFAULT == "WEBGPU" and CI, "Nan handling differs on Vulkan") + def test_float16_log2_special(self): + # special values: inf, -inf, nan, 0, negative + with Context(TRANSCENDENTAL=2), np.errstate(all='ignore'): + # log2(inf) = inf + assert np.isinf(Tensor([np.inf], dtype=dtypes.float16).log2().numpy()[0]) + # log2(0) = -inf + assert Tensor([0.0], dtype=dtypes.float16).log2().numpy()[0] == -np.inf + # log2(negative) = nan + assert np.isnan(Tensor([-1.0], dtype=dtypes.float16).log2().numpy()[0]) + # log2(nan) = nan + assert np.isnan(Tensor([np.nan], dtype=dtypes.float16).log2().numpy()[0]) + + @unittest.skipUnless(is_dtype_supported(dtypes.float16, Device.DEFAULT), f"no float16 on {Device.DEFAULT}") + def test_float16_log2_denormal(self): + # test values near and below float16 min normal (6.1e-5) + # these exercise the denormal handling path with 2^10 scaling + test_values = [1e-4, 6e-5, 1e-5] + with Context(TRANSCENDENTAL=2): + for val in test_values: + result = Tensor([val], dtype=dtypes.float16).log2().numpy()[0] + expected = np.log2(np.float16(val)) + # denormals have lower precision due to float16 limitations + np.testing.assert_allclose(result, expected, rtol=5e-2, err_msg=f"log2({val})") + class TestTranscendentalSchedule(unittest.TestCase): @unittest.skipUnless(is_dtype_supported(dtypes.ulong), "Needs ulong") def test_transcendental_sin_fusion(self): diff --git a/tinygrad/uop/decompositions.py b/tinygrad/uop/decompositions.py index 82dfe67316..f37d01bd58 100644 --- a/tinygrad/uop/decompositions.py +++ b/tinygrad/uop/decompositions.py @@ -223,26 +223,26 @@ def xlog2(d:UOp) -> UOp: Paper: https://arxiv.org/pdf/2001.09258 5.5 """ assert d.dtype.scalar() in TRANSCENDENTAL_DTYPES - # TODO: float16 denormal need float32 to achieve precision - if d.dtype.scalar() == dtypes.float16: return xlog2(d.cast(dtypes.float32)).cast(dtypes.float16) - FLT_MIN = d.const_like(1e-6 if d.dtype.scalar() == dtypes.float16 else 1e-4) + # float16 uses 2^10 for denormal scaling (2^64 overflows), float32/64 use 2^64 + denormal_exp = 10 if d.dtype.scalar() == dtypes.float16 else 64 + FLT_MIN = d.const_like({dtypes.float16: 6.1e-5, dtypes.float32: 1e-4, dtypes.float64: 1e-4}[d.dtype.scalar()]) is_denormal = d Date: Thu, 1 Jan 2026 11:37:26 -0500 Subject: [PATCH 25/25] update tqdm for edge case (#13956) 1.00kit/s and not 1000it/s for value 999.5 --- test/unit/test_tqdm.py | 20 ++++++++++++++++++++ tinygrad/helpers.py | 4 +++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/test/unit/test_tqdm.py b/test/unit/test_tqdm.py index 2ba3f2fe4a..bf89d49e6e 100644 --- a/test/unit/test_tqdm.py +++ b/test/unit/test_tqdm.py @@ -128,6 +128,26 @@ class TestProgressBar(unittest.TestCase): self._compare_bars(tinytqdm_output, tqdm_output) if n > 5: break + @patch('sys.stderr', new_callable=StringIO) + @patch('shutil.get_terminal_size') + def test_si_boundary(self, mock_terminal_size, mock_stderr): + """Test SI formatting at boundaries (e.g., 999.5 -> 1.00k, not 1000)""" + ncols = 80 + mock_terminal_size.return_value = namedtuple(field_names='columns', typename='terminal_size')(ncols) + + # Test rates at the boundary: 999 stays as "999", 999.5+ becomes "1.00k" + for rate in [999, 999.4, 999.5, 1000, 1001]: + mock_stderr.truncate(0) + mock_stderr.seek(0) + elapsed = 1.0 / rate + # Need 3 perf_counter calls: init st, init update, final update + with patch('time.perf_counter', side_effect=[0, 0, elapsed]): + bar = tinytqdm(desc="Test", total=1, unit_scale=True, rate=10**9) + bar.update(1, close=True) + tinytqdm_output = mock_stderr.getvalue().split("\r")[-1].rstrip() + tqdm_output = tqdm.format_meter(n=1, total=1, elapsed=elapsed, ncols=ncols, prefix="Test", unit_scale=True) + self._compare_bars(tinytqdm_output, tqdm_output) + @unittest.skip("this is flaky") @patch('sys.stderr', new_callable=StringIO) @patch('shutil.get_terminal_size') diff --git a/tinygrad/helpers.py b/tinygrad/helpers.py index e3abbde9e4..a730237764 100644 --- a/tinygrad/helpers.py +++ b/tinygrad/helpers.py @@ -508,7 +508,9 @@ class tqdm(Generic[T]): if elapsed and self.i/elapsed > self.rate and self.i: self.skip = max(int(self.i/elapsed)//self.rate,1) def HMS(t): return ':'.join(f'{x:02d}' if i else str(x) for i,x in enumerate([int(t)//3600,int(t)%3600//60,int(t)%60]) if i or x) def SI(x): - return (f"{x/1000**int(g:=round(math.log(x,1000),6)):.{int(3-3*math.fmod(g,1))}f}"[:4].rstrip('.')+' kMGTPEZY'[int(g)].strip()) if x else '0.00' + if not x: return '0.00' + v = f"{x/1000**int(g:=round(math.log(x,1000),6)):.{int(3-3*math.fmod(g,1))}f}"[:4].rstrip('.') + return (f"{x/1000**(int(g)+1):.3f}"[:4].rstrip('.')+' kMGTPEZY'[int(g)+1]) if v == "1000" else v+' kMGTPEZY'[int(g)].strip() prog_text = f'{SI(self.n)}{f"/{SI(self.t)}" if self.t else self.unit}' if self.unit_scale else f'{self.n}{f"/{self.t}" if self.t else self.unit}' est_text = f'<{HMS(elapsed/prog-elapsed) if self.n else "?"}' if self.t else '' it_text = (SI(self.n/elapsed) if self.unit_scale else f"{self.n/elapsed:5.2f}") if self.n else "?"