mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
add v_fmac_f16 vop3 instruction to remu (#10247)
* fmac vop3 * from the box
This commit is contained in:
@@ -1127,13 +1127,14 @@ impl<'a> Thread<'a> {
|
||||
self.vec_reg.write64(vdst, ret)
|
||||
}
|
||||
}
|
||||
306 | 309 | 313 | 596 | 584 | 585 | 588 => {
|
||||
306 | 309 | 310 | 313 | 596 | 584 | 585 | 588 => {
|
||||
let (s0, s1, s2) = (self.val(src.0), self.val(src.1), self.val(src.2));
|
||||
let s0 = f16::from_bits(s0).negate(0, neg).absolute(0, abs);
|
||||
let s1 = f16::from_bits(s1).negate(1, neg).absolute(1, abs);
|
||||
let s2 = f16::from_bits(s2).negate(1, neg).absolute(1, abs);
|
||||
let ret = match op {
|
||||
309 => s0 * s1,
|
||||
310 => f16::mul_add(s0, s1, f16::from_bits(self.vec_reg[vdst] as u16)),
|
||||
306 => s0 + s1,
|
||||
584 => f16::mul_add(s0, s1, s2),
|
||||
585 => f16::min(f16::min(s0, s1), s2),
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import numpy as np
|
||||
import unittest
|
||||
import subprocess
|
||||
import subprocess, struct
|
||||
from typing import cast
|
||||
from tinygrad.runtime.ops_amd import AMDProgram, AMDDevice
|
||||
from tinygrad import Tensor, dtypes, Device
|
||||
@@ -83,7 +83,7 @@ amdhsa.version:
|
||||
+ "\n" + code_start + code + f"\n.size {function_name}, .-{function_name}"
|
||||
return AMDProgram(cast(AMDDevice, Device["AMD"]), function_name, assemble(ret))
|
||||
|
||||
def get_output(s:str, n_threads:int):
|
||||
def get_output(s:str, n_threads:int=1):
|
||||
assert n_threads <= 32
|
||||
code = "\n".join(["s_load_b64 s[0:1], s[0:1], null", "v_lshlrev_b32_e32 v0, 2, v0", s,
|
||||
"s_waitcnt 0",
|
||||
@@ -94,6 +94,8 @@ def get_output(s:str, n_threads:int):
|
||||
prg(test._buf, global_size=(1, 1, 1), local_size=(n_threads, 1, 1), wait=True)
|
||||
return test.numpy()
|
||||
|
||||
def f16_to_bits(x:float) -> int: return struct.unpack('<H', struct.pack('<e', x))[0]
|
||||
|
||||
@unittest.skipUnless(Device.DEFAULT == "AMD", "tests RDNA3")
|
||||
class TestHW(unittest.TestCase):
|
||||
def setUp(self):
|
||||
@@ -140,5 +142,15 @@ class TestHW(unittest.TestCase):
|
||||
""", n_threads=2)
|
||||
np.testing.assert_equal(out, 0b01)
|
||||
|
||||
def test_fmac_vop3_modifier(self):
|
||||
init_state = f"""
|
||||
v_mov_b32_e32 v10 {f16_to_bits(4.0)}
|
||||
v_mov_b32_e32 v11 {f16_to_bits(3.0)}
|
||||
v_mov_b32_e32 v1 {f16_to_bits(2.0)}
|
||||
"""
|
||||
self.assertEqual(get_output(init_state+"\n"+"v_fmac_f16_e64 v1 v11 v10"), f16_to_bits(14.))
|
||||
self.assertEqual(get_output(init_state+"\n"+"v_fmac_f16_e64 v1 -v11 v10"), f16_to_bits(-10.))
|
||||
self.assertEqual(get_output(init_state+"\n"+"v_fmac_f16_e64 v1 -v11 -v10"), f16_to_bits(14.))
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user