remove old dist multigpu (#3811)

2026-04-29 03:00:14 -04:00 · 2024-03-18 18:31:05 -04:00
parent 5dd048a378
commit 20681d5c4a
6 changed files with 0 additions and 348 deletions
--- a/test/external/external_test_dist_collectives.py
+++ b/test/external/external_test_dist_collectives.py
@@ -1,61 +0,0 @@
-from extra import dist
-from tinygrad.features.jit import TinyJit
-if __name__ == "__main__":
-  dist.preinit()
-
-from extra.dist import collectives
-from tinygrad.helpers import CI, getenv
-from tinygrad.tensor import Tensor
-import numpy as np
-
-@TinyJit
-def allreduce_jit(t:Tensor) -> Tensor:
-  return collectives.allreduce(t).realize()
-
-SIZE = 2048 if not CI else 2
-SIZE_2 = 255 if not CI else 3
-
-def run():
-  # set a deterministic seed so that both ranks generate the same random tensor
-  Tensor.manual_seed(42)
-
-  rank = getenv("RANK")
-
-  # loop 3 times to make sure it works with the jit
-  for _ in range(3):
-    # create a tensor to send
-    t = Tensor.zeros(SIZE, SIZE) if rank != 0 else Tensor.ones(SIZE, SIZE)
-    t2 = allreduce_jit(t.contiguous().realize())
-    assert np.allclose(np.ones((SIZE, SIZE)), t2.numpy()), f"{t2.numpy()} wasn't ones"
-
-  # reset jit
-  allreduce_jit.cnt = 0
-
-  # test uneven chunk sizes
-  for _ in range(3):
-    # create a tensor to send
-    t = Tensor.ones(SIZE_2, SIZE_2, SIZE_2) if rank == 0 else Tensor.zeros(SIZE_2, SIZE_2, SIZE_2)
-    t2 = allreduce_jit(t.contiguous().realize())
-    assert np.allclose(np.ones((SIZE_2, SIZE_2, SIZE_2)), t2.numpy()), f"{t2.numpy()} wasn't ones"
-
-  print(f"rank {rank} passed")
-
-if __name__ == "__main__":
-  if getenv("HIP"):
-    from tinygrad.runtime.ops_hip import HIP
-    devices = [f"hip:{i}" for i in range(HIP.device_count)]
-  else:
-    from tinygrad.runtime.ops_gpu import CL
-    devices = [f"gpu:{i}" for i in range(len(CL.devices))] if not CI else ["gpu:0", "gpu:0"]
-  world_size = len(devices)
-
-  dist.init_oob(world_size)
-
-  processes = []
-  for rank, device in enumerate(devices):
-    processes.append(dist.spawn(rank, device, fn=run, args=()))
-  for p in processes: p.join()
-
-  # exit with error code if any of the processes failed
-  for p in processes:
-    if p.exitcode != 0: exit(p.exitcode)
--- a/test/external/external_test_dist_world.py
+++ b/test/external/external_test_dist_world.py
@@ -1,68 +0,0 @@
-from extra import dist
-from tinygrad.features.jit import TinyJit
-if __name__ == "__main__":
-  dist.preinit()
-
-from extra.dist import world
-from tinygrad.helpers import CI, getenv
-from tinygrad.tensor import Tensor
-import numpy as np
-
-@TinyJit
-def send_jit(t, target_rank) -> Tensor:
-  return world.send(t, target_rank).realize()
-
-@TinyJit
-def recv_jit(t, target_rank) -> Tensor:
-  return world.recv(t, target_rank).realize()
-
-SIZE = 2048 if not CI else 2
-
-def run():
-  # set a deterministic seed so that both ranks generate the same random tensor
-  Tensor.manual_seed(42)
-
-  rank = getenv("RANK")
-
-  # loop 3 times to make sure it works with the jit
-  for _ in range(3):
-    # create a tensor to send
-    t = Tensor.randn(SIZE, SIZE)
-
-    # send to rank 1
-    if rank == 0:
-      send_jit(t, 1)
-    elif rank == 1:
-      t2 = Tensor.empty(SIZE, SIZE)
-      recv_jit(t2, 0)
-
-    # recv from rank 1
-    if rank == 0:
-      t2 = Tensor.empty(SIZE, SIZE)
-      recv_jit(t2, 1)
-    elif rank == 1:
-      send_jit(t2, 0)
-
-    # check that the received tensor is the same as the sent tensor
-    if rank == 0:
-      assert np.allclose(t.numpy(), t2.numpy()), f"{t2.numpy()} wasn't equal to {t.numpy()}"
-
-  print(f"rank {rank} passed")
-
-if __name__ == "__main__":
-  if getenv("HIP"):
-    devices = ["hip:0", "hip:1"]
-  else:
-    devices = ["gpu:0", "gpu:1" if not CI else "gpu:0"]
-  world_size = len(devices)
-
-  dist.init_oob(world_size)
-
-  processes = []
-  for rank, device in enumerate(devices):
-    processes.append(dist.spawn(rank, device, fn=run, args=()))
-  for p in processes: p.join()
-
-  # exit with error code if any of the processes failed
-  for p in processes:
-    if p.exitcode != 0: exit(p.exitcode)