mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
add v_rcp_f32_e64 to remu (#10393)
* tests from the box * add v_rcp_f32_e64 to remu * f32::from_bits utils * v_cndmask_b32 tests
This commit is contained in:
@@ -55,10 +55,7 @@ where
|
|||||||
{
|
{
|
||||||
fn negate(&self, pos: usize, modifier: usize) -> T {
|
fn negate(&self, pos: usize, modifier: usize) -> T {
|
||||||
match (modifier >> pos) & 1 {
|
match (modifier >> pos) & 1 {
|
||||||
1 => match self.is_zero() {
|
1 => -*self,
|
||||||
true => T::zero(),
|
|
||||||
false => -*self,
|
|
||||||
},
|
|
||||||
_ => *self,
|
_ => *self,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -122,7 +119,7 @@ mod tests {
|
|||||||
assert_eq!(0.3_f32.negate(2, 0b100), -0.3_f32);
|
assert_eq!(0.3_f32.negate(2, 0b100), -0.3_f32);
|
||||||
assert_eq!(0.3_f32.negate(0, 0b110), 0.3_f32);
|
assert_eq!(0.3_f32.negate(0, 0b110), 0.3_f32);
|
||||||
assert_eq!(0.3_f32.negate(1, 0b010), -0.3_f32);
|
assert_eq!(0.3_f32.negate(1, 0b010), -0.3_f32);
|
||||||
assert_eq!(0.0_f32.negate(0, 0b001).to_bits(), 0);
|
assert_eq!(0.0_f32.negate(0, 0b001).to_bits(), (-0.0f32).to_bits());
|
||||||
assert_eq!((-0.0_f32).negate(0, 0b001).to_bits(), 0);
|
assert_eq!((-0.0_f32).negate(0, 0b001).to_bits(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1246,7 +1246,7 @@ impl<'a> Thread<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let ret = match op {
|
let ret = match op {
|
||||||
257 | 259 | 299 | 260 | 261 | 264 | 272 | 392 | 531 | 537 | 540 | 551 | 567 | 796 => {
|
257 | 259 | 299 | 260 | 261 | 264 | 272 | 392 | 426 | 531 | 537 | 540 | 551 | 567 | 796 => {
|
||||||
let s0 = f32::from_bits(s0).negate(0, neg).absolute(0, abs);
|
let s0 = f32::from_bits(s0).negate(0, neg).absolute(0, abs);
|
||||||
let s1 = f32::from_bits(s1).negate(1, neg).absolute(1, abs);
|
let s1 = f32::from_bits(s1).negate(1, neg).absolute(1, abs);
|
||||||
let s2 = f32::from_bits(s2).negate(2, neg).absolute(2, abs);
|
let s2 = f32::from_bits(s2).negate(2, neg).absolute(2, abs);
|
||||||
@@ -1257,6 +1257,7 @@ impl<'a> Thread<'a> {
|
|||||||
264 => s0 * s1,
|
264 => s0 * s1,
|
||||||
272 => f32::max(s0, s1),
|
272 => f32::max(s0, s1),
|
||||||
299 => f32::mul_add(s0, s1, f32::from_bits(self.vec_reg[vdst])),
|
299 => f32::mul_add(s0, s1, f32::from_bits(self.vec_reg[vdst])),
|
||||||
|
426 => s0.recip(),
|
||||||
531 => f32::mul_add(s0, s1, s2),
|
531 => f32::mul_add(s0, s1, s2),
|
||||||
537 => f32::min(f32::min(s0, s1), s2),
|
537 => f32::min(f32::min(s0, s1), s2),
|
||||||
540 => f32::max(f32::max(s0, s1), s2),
|
540 => f32::max(f32::max(s0, s1), s2),
|
||||||
@@ -3032,7 +3033,7 @@ mod test_vop3 {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_v_cndmask_b32_e64_neg() {
|
fn test_v_cndmask_b32_e64_neg() {
|
||||||
[[0.0f32, 0.0], [-0.0f32, 0.0], [1.0f32, -1.0], [-1.0f32, 1.0]].iter().for_each(|[input, ret]| {
|
[[0.0f32, -0.0], [-0.0f32, 0.0], [1.0f32, -1.0], [-1.0f32, 1.0]].iter().for_each(|[input, ret]| {
|
||||||
let mut thread = _helper_test_thread();
|
let mut thread = _helper_test_thread();
|
||||||
thread.scalar_reg[0] = false as u32;
|
thread.scalar_reg[0] = false as u32;
|
||||||
thread.vec_reg[3] = input.to_bits();
|
thread.vec_reg[3] = input.to_bits();
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import unittest
|
import unittest
|
||||||
import subprocess, struct
|
import subprocess, struct, math
|
||||||
from typing import cast
|
from typing import cast
|
||||||
from tinygrad.runtime.ops_amd import AMDProgram, AMDDevice
|
from tinygrad.runtime.ops_amd import AMDProgram, AMDDevice
|
||||||
from tinygrad import Tensor, dtypes, Device
|
from tinygrad import Tensor, dtypes, Device
|
||||||
@@ -95,6 +95,8 @@ def get_output(s:str, n_threads:int=1):
|
|||||||
return test.numpy()
|
return test.numpy()
|
||||||
|
|
||||||
def f16_to_bits(x:float) -> int: return struct.unpack('<H', struct.pack('<e', x))[0]
|
def f16_to_bits(x:float) -> int: return struct.unpack('<H', struct.pack('<e', x))[0]
|
||||||
|
def f32_from_bits(x:int) -> float: return struct.unpack('<f', struct.pack('<I', x))[0]
|
||||||
|
def f32_to_bits(x:float) -> int: return struct.unpack('<I', struct.pack('<f', x))[0]
|
||||||
|
|
||||||
@unittest.skipUnless(Device.DEFAULT == "AMD", "tests RDNA3")
|
@unittest.skipUnless(Device.DEFAULT == "AMD", "tests RDNA3")
|
||||||
class TestHW(unittest.TestCase):
|
class TestHW(unittest.TestCase):
|
||||||
@@ -168,5 +170,33 @@ class TestHW(unittest.TestCase):
|
|||||||
s_abs_i32(0xffffffff, 0x00000001, scc=1)
|
s_abs_i32(0xffffffff, 0x00000001, scc=1)
|
||||||
s_abs_i32(0, 0, scc=0)
|
s_abs_i32(0, 0, scc=0)
|
||||||
|
|
||||||
|
def test_v_rcp_f32_neg_vop3(self):
|
||||||
|
def v_neg_rcp_f32(x:float, y:float):
|
||||||
|
out = get_output(f"""
|
||||||
|
v_mov_b32_e32 v1 {f32_to_bits(x)}
|
||||||
|
v_rcp_f32_e64 v1, -v1
|
||||||
|
""")[0]
|
||||||
|
assert out == f32_to_bits(y), f"{f32_from_bits(out)} != {y} / {out} != {f32_to_bits(y)}"
|
||||||
|
v_neg_rcp_f32(math.inf, -0.0)
|
||||||
|
v_neg_rcp_f32(-math.inf, 0.0)
|
||||||
|
v_neg_rcp_f32(0.0, -math.inf)
|
||||||
|
v_neg_rcp_f32(-0.0, math.inf)
|
||||||
|
v_neg_rcp_f32(-2.0, 0.5)
|
||||||
|
v_neg_rcp_f32(2.0, -0.5)
|
||||||
|
|
||||||
|
def test_v_cndmask_b32_neg(self):
|
||||||
|
def v_neg(x:int|float, y:float):
|
||||||
|
out = get_output(f"""
|
||||||
|
v_mov_b32_e32 v1 {f32_to_bits(x)}
|
||||||
|
s_mov_b32_e32 s10 1 // always pick -v1
|
||||||
|
v_cndmask_b32 v1, v1, -v1 s10
|
||||||
|
""")[0]
|
||||||
|
assert out == f32_to_bits(y), f"{f32_from_bits(out)} != {y} / {out} != {f32_to_bits(y)}"
|
||||||
|
v_neg(-0.0, 0.0)
|
||||||
|
v_neg(0.0, -0.0)
|
||||||
|
v_neg(2.0, -2.0)
|
||||||
|
v_neg(math.inf, -math.inf)
|
||||||
|
v_neg(-math.inf, math.inf)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user