From 76d465dbc35fc1f47c3d19c8e2f4f6652a891084 Mon Sep 17 00:00:00 2001 From: Nino Risteski <95188570+NinoRisteski@users.noreply.github.com> Date: Tue, 9 Dec 2025 18:28:36 +0100 Subject: [PATCH] optim empty shard #13513 (#13598) * optim empty shard * remove tuple * simplify * lint * lint2 * test * remove original buffer unique id * new rule * reset shard * update * reset shard --- test/test_multitensor.py | 6 ++++++ tinygrad/schedule/multi.py | 2 ++ 2 files changed, 8 insertions(+) diff --git a/test/test_multitensor.py b/test/test_multitensor.py index db7bc1fd15..3ad7b36ca9 100644 --- a/test/test_multitensor.py +++ b/test/test_multitensor.py @@ -57,6 +57,12 @@ class TestMultiTensor(unittest.TestCase): assert lb.shape == (128,) (X + X).realize() + def test_shard_empty(self): + GlobalCounters.reset() + X = Tensor.empty(256).shard(devices_2, 0).realize() + assert GlobalCounters.kernel_count == 0 + (X + X).realize() + def _test_shard_op(self, op, out, n=4): t = Tensor.ones(n).contiguous().realize().shard(devices_2, 0) r = op(t).realize() diff --git a/tinygrad/schedule/multi.py b/tinygrad/schedule/multi.py index fe769996bc..769e4784ab 100644 --- a/tinygrad/schedule/multi.py +++ b/tinygrad/schedule/multi.py @@ -102,6 +102,8 @@ def mstack_early_shrink(ms:UOp, shrink:UOp): replace_allreduce = PatternMatcher([ (UPat(Ops.ALLREDUCE, src=(UPat.var("buf"), UPat()), name="red"), handle_allreduce_multirank), (UPat(Ops.ALLREDUCE, src=(UPat.var("buf"), UPat()), name="red"), handle_allreduce), + (UPat(Ops.COPY, src=(UPat(Ops.BUFFER, name="buf"), UPat(Ops.DEVICE, name="dev"))),lambda buf,dev: UOp.new_buffer(dev.arg, buf.arg, buf.dtype) + if buf.device not in {"DISK", "NPY"} and isinstance(dev.arg, tuple) and isinstance(buf.device, str) else None), # BROADCAST: explicitly expand broadcast copies and combine with MSTACK (UPat(Ops.COPY, name="c", src=(UPat(GroupOp.All-{Ops.CONST}, name="x"), UPat(Ops.DEVICE))), lambda c,x: UOp(Ops.MSTACK, c.dtype, tuple(x.copy_to_device(d) for d in c.device)) if isinstance(c.device, tuple) and isinstance(x.device, str) else None),