diff --git a/extra/assembly/rdna3/asm.py b/extra/assembly/rdna3/asm.py index 01351914e4..f2c0e92090 100644 --- a/extra/assembly/rdna3/asm.py +++ b/extra/assembly/rdna3/asm.py @@ -259,8 +259,10 @@ def disasm(inst: Inst) -> str: else: is_16bit_op = any(x in op_name for x in _16BIT_TYPES) and not any(x in op_name for x in ('dot2', 'pk_', 'sad', 'msad', 'qsad', 'mqsad')) is_f16_dst = is_f16_src = is_f16_src2 = is_16bit_op + # Check if any opsel bit is set (any operand uses .h) - if so, we need explicit .l for low-half + any_hi = opsel != 0 def fmt_vop3_src(v, neg_bit, abs_bit, hi_bit=False, reg_cnt=1, is_16=False): - s = _fmt_src_n(v, reg_cnt) if reg_cnt > 1 else f"v{v - 256}.h" if is_16 and v >= 256 and hi_bit else f"v{v - 256}.l" if is_16 and v >= 256 else fmt_src(v) + s = _fmt_src_n(v, reg_cnt) if reg_cnt > 1 else f"v{v - 256}.h" if is_16 and v >= 256 and hi_bit else f"v{v - 256}.l" if is_16 and v >= 256 and any_hi else fmt_src(v) if abs_bit: s = f"|{s}|" return f"-{s}" if neg_bit else s # Determine register count for each source (check for cvt-specific 64-bit flags first) @@ -280,7 +282,7 @@ def disasm(inst: Inst) -> str: elif dst_cnt > 1: dst_str = _vreg(vdst, dst_cnt) elif is_f16_dst: - dst_str = f"v{vdst}.h" if (opsel & 8) else f"v{vdst}.l" + dst_str = f"v{vdst}.h" if (opsel & 8) else f"v{vdst}.l" if any_hi else f"v{vdst}" else: dst_str = f"v{vdst}" clamp_str = " clamp" if clmp else "" diff --git a/extra/assembly/rdna3/lib.py b/extra/assembly/rdna3/lib.py index f93149eed0..7f2db1e014 100644 --- a/extra/assembly/rdna3/lib.py +++ b/extra/assembly/rdna3/lib.py @@ -40,8 +40,9 @@ bits = _Bits() # Register types class Reg: - def __init__(self, idx: int, count: int = 1, hi: bool = False): self.idx, self.count, self.hi = idx, count, hi + def __init__(self, idx: int, count: int = 1, hi: bool = False, neg: bool = False): self.idx, self.count, self.hi, self.neg = idx, count, hi, neg def __repr__(self): return f"{self.__class__.__name__.lower()[0]}[{self.idx}]" if self.count == 1 else f"{self.__class__.__name__.lower()[0]}[{self.idx}:{self.idx + self.count}]" + def __neg__(self): return self.__class__(self.idx, self.count, self.hi, neg=not self.neg) T = TypeVar('T', bound=Reg) class _RegFactory(Generic[T]): @@ -162,6 +163,11 @@ class Inst: if name in SRC_FIELDS: encoded = encode_src(val) self._values[name] = RawImm(encoded) + # Handle negation modifier for VOP3 instructions + if isinstance(val, Reg) and val.neg and 'neg' in self._fields: + neg_bit = {'src0': 1, 'src1': 2, 'src2': 4}.get(name, 0) + cur_neg = self._values.get('neg', 0) + self._values['neg'] = (cur_neg.val if isinstance(cur_neg, RawImm) else cur_neg) | neg_bit # Track literal value if needed (encoded as 255) if encoded == 255 and self._literal is None and isinstance(val, int) and not isinstance(val, IntEnum): self._literal = val diff --git a/extra/remu/test/hwtest.py b/extra/remu/test/hwtest.py index 76e64d5dd3..0d769099e9 100644 --- a/extra/remu/test/hwtest.py +++ b/extra/remu/test/hwtest.py @@ -48,14 +48,12 @@ class TestHW(unittest.TestCase): ]) self.assertEqual(out, [2]) - # assembler err - @unittest.expectedFailure def test_simple_s_mov(self): out = get_output([ s_mov_b32(s[7], 0x7fffffff), v_mov_b32_e32(v[1], s[7]), ]) - self.assertEqual(out, [2]) + self.assertEqual(out, [0x7fffffff]) def test_exec_mov(self): out = get_output([ @@ -102,8 +100,6 @@ class TestHW(unittest.TestCase): self.assertEqual(run_fmac(a, -b), f16_to_bits(-10.0)) self.assertEqual(run_fmac(-a, -b), f16_to_bits(14.0)) - # assembler err - @unittest.expectedFailure def test_s_abs_i32(self): def check(x, y, dst=s[10], scc=0): for reg,val in [(dst, y), (SCC, scc)]: @@ -121,8 +117,6 @@ class TestHW(unittest.TestCase): check(0xffffffff, 0x00000001, scc=1) check(0, 0, scc=0) - # how do I negate a VGPR operand? - @unittest.expectedFailure def test_v_rcp_f32_neg_vop3(self): def v_neg_rcp_f32(x:float, y:float): out = get_output([ @@ -138,14 +132,12 @@ class TestHW(unittest.TestCase): v_neg_rcp_f32(-2.0, 0.5) v_neg_rcp_f32(2.0, -0.5) - # how do I negate a VGPR operand? - @unittest.expectedFailure def test_v_cndmask_b32_neg(self): def v_neg(x:float, y:float): out = get_output([ v_mov_b32_e32(v[1], f32_to_bits(x)), s_mov_b32(s[10], 1), - v_cndmask_b32_e32(v[1], v[1], -v[1], s[10]), + v_cndmask_b32_e64(v[1], v[1], -v[1], s[10]), ])[0] assert out == f32_to_bits(y), f"{f32_from_bits(out)} != {y} / {out} != {f32_to_bits(y)}"