more work on kfd (#4079)

* more work on kfd * fix multitensor test on kfd * stuff
2026-04-29 03:00:14 -04:00 · 2024-04-05 08:36:36 -07:00
parent e7ff5102cf
commit a337922c44
3 changed files with 42 additions and 11 deletions
--- a/test/external/fuzz_kfd.py
+++ b/test/external/fuzz_kfd.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+import random
+from tqdm import trange
+from typing import List
+from tinygrad import Device
+from tinygrad.runtime.ops_kfd import KFDDevice, HWCopyQueue, HWComputeQueue
+
+if __name__ == "__main__":
+  dev: List[KFDDevice] = [Device[f"KFD:{i}"] for i in range(6)]
+  print(f"got {len(dev)} devices")
+
+  buffers = [(rd:=random.choice(dev), rd.allocator.alloc(random.randint(1, 10000))) for i in range(100)]
+
+  for _ in trange(100000):
+    d1, b1 = random.choice(buffers)
+    d2, b2 = random.choice(buffers)
+    d1._gpu_map(b2)
+    q = HWComputeQueue()
+    q.signal(sig:=KFDDevice._get_signal(10))
+    qc = HWCopyQueue()
+    qc.wait(sig)
+    qc.copy(b1.va_addr, b2.va_addr, min(b1.size, b2.size))
+    d1.completion_signal.value = 1
+    qc.signal(d1.completion_signal)
+    qc.submit(d1)
+    q.wait(d1.completion_signal)
+    q.submit(d1)
+    KFDDevice._wait_on(d1.completion_signal.event_id)
--- a/test/test_multitensor.py
+++ b/test/test_multitensor.py
@@ -115,9 +115,8 @@ class TestMultiTensor(unittest.TestCase):
    fn = f(n)
    np.testing.assert_allclose(fX.numpy(), fn, rtol=1e-6, atol=1e-6)

-  @unittest.skipIf(CI and Device.DEFAULT == "CLANG", "clang is slow")
+  @unittest.skip("slow")
  def test_fuzz_allreduce(self):
-
    random.seed(41)
    for it in range(100):
      for n in range(2, 4+1):
@@ -132,7 +131,6 @@ class TestMultiTensor(unittest.TestCase):
        assert mean_err < 1e-6, f"big mean error, iteration {it}_{n}"
        assert max_err < 1e-6, f"big max error, iteration {it}_{n}"

-
  def _test_matmul_shard_axis(self, shard_x, shard_w, device):
    X = Tensor.kaiming_uniform(N, N).realize()
    W = Tensor.kaiming_uniform(N, N).realize()