remu: add new instructions introduced in RANGEIFY (#12363)

* add v_mad_i64_i32 for test_output_padded_conv_transpose2d

* run amd test_ops

* skip test_masked_select
This commit is contained in:
qazal
2025-09-30 12:36:29 +03:00
committed by GitHub
parent 360980f1a3
commit e8c595c29e
2 changed files with 12 additions and 1 deletions

View File

@@ -930,7 +930,7 @@ impl<'a> Thread<'a> {
let op = ((instr >> 16) & 0x3ff) as u32;
match op {
764 | 765 | 288 | 289 | 290 | 766 | 768 | 769 => {
764 | 765 | 288 | 289 | 290 | 766 | 767 | 768 | 769 => {
let vdst = (instr & 0xff) as usize;
let sdst = ((instr >> 8) & 0x7f) as usize;
let f = |i: u32| -> usize { ((instr >> i) & 0x1ff) as usize };
@@ -944,6 +944,16 @@ impl<'a> Thread<'a> {
assert_eq!(clmp, 0);
let vcc = match op {
767 => {
let (s0, s1, s2): (u32, u32, u64) = (self.val(s0), self.val(s1), self.val(s2));
let (mul_result, overflow_mul) = (s0 as i64).overflowing_mul(s1 as i64);
let (ret, overflow_add) = mul_result.overflowing_add(s2 as i64);
let overflowed = overflow_mul || overflow_add;
if self.exec.read() {
self.vec_reg.write64(vdst, ret as u64);
}
overflowed
},
766 => {
let (s0, s1, s2): (u32, u32, u64) = (self.val(s0), self.val(s1), self.val(s2));
let (mul_result, overflow_mul) = (s0 as u64).overflowing_mul(s1 as u64);

View File

@@ -3164,6 +3164,7 @@ class TestOps(unittest.TestCase):
helper_test_op([(32,10)], lambda x: x.masked_fill((x>0.1).detach(), -math.inf))
helper_test_op([(32,10)], lambda x: x.masked_fill((x<0.1).detach(), -math.inf))
@unittest.skipIf(getenv("MOCKGPU") and Device.DEFAULT == "AMD" and RANGEIFY, "very slow on MOCKGPU because reduce does not fold")
def test_masked_select(self):
helper_test_op([(32, 10)], lambda x: x.masked_select(x>0.5), lambda x: x.masked_select(x>0.5), forward_only=True)
helper_test_op([(32, 10)], lambda x: x.masked_select(torch.tensor(True)), lambda x: x.masked_select(Tensor(True)), forward_only=True)