mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 15:38:29 -05:00
@@ -5,6 +5,7 @@ from tinygrad.device import Device, Buffer
|
||||
from tinygrad.tensor import Tensor, _to_np_dtype
|
||||
from tinygrad.helpers import Context, CI, dedup, from_mv
|
||||
from tinygrad.dtype import dtypes
|
||||
from tinygrad.engine.jit import MultiGraphRunner
|
||||
from tinygrad.engine.realize import ExecItem, BufferXfer, get_runner, CompiledRunner
|
||||
|
||||
np.random.seed(1337)
|
||||
@@ -106,8 +107,9 @@ class TestGraph(unittest.TestCase):
|
||||
|
||||
helper_test_graphs(Device[d0].graph, graphs)
|
||||
|
||||
@unittest.skipUnless(Device.DEFAULT in {"CUDA", "NV", "AMD"}, "mutidevice graph required")
|
||||
def test_order_copy_writed(self):
|
||||
if not issubclass(Device[Device.DEFAULT].graph, MultiGraphRunner): self.skipTest("graph does not supported (not MultiGraphRunner)")
|
||||
|
||||
d0 = Device.DEFAULT
|
||||
b0 = [helper_alloc_rawbuffer(d0, fill=True) for _ in range(4)]
|
||||
|
||||
@@ -117,8 +119,9 @@ class TestGraph(unittest.TestCase):
|
||||
|
||||
helper_test_graphs(Device[d0].graph, graphs)
|
||||
|
||||
@unittest.skipUnless(Device.DEFAULT in {"CUDA", "NV", "AMD"}, "mutidevice graph required")
|
||||
def test_order_copy_then_read(self):
|
||||
if not issubclass(Device[Device.DEFAULT].graph, MultiGraphRunner): self.skipTest("graph does not supported (not MultiGraphRunner)")
|
||||
|
||||
d0 = Device.DEFAULT
|
||||
b0 = [helper_alloc_rawbuffer(d0, fill=True) for _ in range(4)]
|
||||
|
||||
@@ -147,8 +150,9 @@ class TestGraph(unittest.TestCase):
|
||||
|
||||
helper_test_graphs(Device[d0].graph, graphs)
|
||||
|
||||
@unittest.skipUnless(Device.DEFAULT in {"CUDA", "NV", "AMD"}, "mutidevice graph required")
|
||||
def test_copies_2_devs(self):
|
||||
if not issubclass(Device[Device.DEFAULT].graph, MultiGraphRunner): self.skipTest("graph does not supported (not MultiGraphRunner)")
|
||||
|
||||
d0, d1 = Device.DEFAULT, f"{Device.DEFAULT}:1"
|
||||
b0 = [helper_alloc_rawbuffer(d0, fill=True) for _ in range(3)]
|
||||
b1 = [helper_alloc_rawbuffer(d1, fill=True) for _ in range(1)]
|
||||
@@ -159,8 +163,9 @@ class TestGraph(unittest.TestCase):
|
||||
|
||||
helper_test_graphs(Device[d0].graph, graphs)
|
||||
|
||||
@unittest.skipUnless(Device.DEFAULT in {"CUDA", "NV", "AMD"}, "mutidevice graph required")
|
||||
def test_copies_after_graph_global(self):
|
||||
if not issubclass(Device[Device.DEFAULT].graph, MultiGraphRunner): self.skipTest("graph does not supported (not MultiGraphRunner)")
|
||||
|
||||
d0, d1, d2, d3 = Device.DEFAULT, f"{Device.DEFAULT}:1", f"{Device.DEFAULT}:2", f"{Device.DEFAULT}:3"
|
||||
b0 = [helper_alloc_rawbuffer(d0, fill=True) for _ in range(8)]
|
||||
b1 = [helper_alloc_rawbuffer(d1, fill=True) for _ in range(6)]
|
||||
@@ -206,8 +211,9 @@ class TestGraph(unittest.TestCase):
|
||||
|
||||
helper_test_graphs(Device[d0].graph, graphs)
|
||||
|
||||
@unittest.skipUnless(Device.DEFAULT in {"CUDA", "NV", "AMD"}, "mutidevice graph required")
|
||||
def test_graph_after_copies_devs(self):
|
||||
if not issubclass(Device[Device.DEFAULT].graph, MultiGraphRunner): self.skipTest("graph does not supported (not MultiGraphRunner)")
|
||||
|
||||
d0, d1, d2, d3 = Device.DEFAULT, f"{Device.DEFAULT}:1", f"{Device.DEFAULT}:2", f"{Device.DEFAULT}:3"
|
||||
b0 = [helper_alloc_rawbuffer(d0, fill=True) for _ in range(8)]
|
||||
b1 = [helper_alloc_rawbuffer(d1, fill=True) for _ in range(1)]
|
||||
@@ -234,6 +240,8 @@ class TestGraph(unittest.TestCase):
|
||||
helper_test_graphs(Device[d0].graph, graphs)
|
||||
|
||||
def test_graph_offset_bufs(self):
|
||||
if not issubclass(Device[Device.DEFAULT].graph, MultiGraphRunner): self.skipTest("graph does not supported (not MultiGraphRunner)")
|
||||
|
||||
d0 = Device.DEFAULT
|
||||
if not hasattr(Device[d0].allocator, "_offset"): self.skipTest("device does not support _offset")
|
||||
|
||||
|
||||
@@ -362,7 +362,7 @@ class TestHCQ(unittest.TestCase):
|
||||
|
||||
gb_s = ((SZ / 1e9) / et_ms) * 1e3
|
||||
print(f"cross device copy: {et_ms:.2f} ms, {gb_s:.2f} GB/s")
|
||||
assert (0.2 if MOCKGPU else 2) <= gb_s <= 50
|
||||
assert (0.2 if MOCKGPU else 2) <= gb_s <= 100
|
||||
|
||||
def test_timeline_signal_rollover(self):
|
||||
for queue_type in [TestHCQ.d0.hw_compute_queue_t, TestHCQ.d0.hw_copy_queue_t]:
|
||||
|
||||
Reference in New Issue
Block a user