Files
tinygrad/test/test_copy_speed.py
George Hotz 4f8f0ac139 minor cleanups, remove dead files (#2398)
* minor cleanups, remove dead files

* s.name

* use disk

* pytest passes on mac
2023-11-23 09:01:50 -08:00

66 lines
2.4 KiB
Python

import unittest
from tinygrad import Tensor
from tinygrad.ops import Device
from tinygrad.helpers import Timing, CI
import multiprocessing.shared_memory as shared_memory
N = 4096 if CI else 16384
class TestCopySpeed(unittest.TestCase):
@classmethod
def setUpClass(cls): Device[Device.DEFAULT].synchronize()
def testCopySHMtoDefault(self):
s = shared_memory.SharedMemory(name="test_X", create=True, size=N*N*4)
s.close()
if CI:
t = Tensor.empty(N, N, device="disk:/dev/shm/test_X").realize()
else:
t = Tensor.empty(N, N, device="disk:shm:test_X").realize()
for _ in range(3):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
with Timing("queue: "):
t.to(Device.DEFAULT).realize()
Device[Device.DEFAULT].synchronize()
s.unlink()
def testCopyCPUtoDefault(self):
t = Tensor.rand(N, N, device="cpu").realize()
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
for _ in range(3):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
with Timing("queue: "):
t.to(Device.DEFAULT).realize()
Device[Device.DEFAULT].synchronize()
def testCopyCPUtoDefaultFresh(self):
print("fresh copy")
for _ in range(3):
t = Tensor.rand(N, N, device="cpu").realize()
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
with Timing("queue: "):
t.to(Device.DEFAULT).realize()
Device[Device.DEFAULT].synchronize()
del t
def testCopyDefaulttoCPU(self):
t = Tensor.rand(N, N).realize()
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
for _ in range(3):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s"):
t.to('cpu').realize()
@unittest.skipIf(CI, "CI doesn't have 6 GPUs")
def testCopyCPUto6GPUs(self):
from tinygrad.runtime.ops_gpu import CL
if len(CL.devices) != 6: raise unittest.SkipTest("computer doesn't have 6 GPUs")
t = Tensor.rand(N, N, device="cpu").realize()
print(f"buffer: {t.nbytes()*1e-9:.2f} GB")
for _ in range(3):
with Timing("sync: ", on_exit=lambda ns: f" @ {t.nbytes()/ns:.2f} GB/s ({t.nbytes()*6/ns:.2f} GB/s total)"):
with Timing("queue: "):
for g in range(6):
t.to(f"gpu:{g}").realize()
Device[f"gpu"].synchronize()
if __name__ == '__main__':
unittest.main()