From dd0070daab052143437ef240a29c7d77ac8de131 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Wed, 30 Apr 2025 17:25:21 -0400 Subject: [PATCH] Revert "flip Ops.COPY order [pr] (#10120)" (#10121) This reverts commit 984f09ac74d224f67372af44bc565f460d2f3931. --- test/unit/test_tensor_uop_representation.py | 2 +- tinygrad/engine/grouper.py | 10 +++++----- tinygrad/engine/multi.py | 2 +- tinygrad/ops.py | 5 ++--- tinygrad/spec.py | 2 +- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/test/unit/test_tensor_uop_representation.py b/test/unit/test_tensor_uop_representation.py index fb9245015d..84ad40f14f 100644 --- a/test/unit/test_tensor_uop_representation.py +++ b/test/unit/test_tensor_uop_representation.py @@ -113,7 +113,7 @@ class TestTensorUopRepresentation(unittest.TestCase): a = Tensor([1.,2,3]).realize() c = a.to("TEST") # NOTE: this isn't checked print(c.lazydata) - is_pattern(c, UPat(Ops.COPY, src=(realized_pattern, UPat(Ops.DEVICE)))) + is_pattern(c, UPat(Ops.COPY, src=(UPat(Ops.DEVICE), realized_pattern,))) def test_empty_buf(self): a = Tensor.empty(3, 3) diff --git a/tinygrad/engine/grouper.py b/tinygrad/engine/grouper.py index 34bd95826c..98343257e4 100644 --- a/tinygrad/engine/grouper.py +++ b/tinygrad/engine/grouper.py @@ -60,12 +60,12 @@ sym = symbolic_simple+PatternMatcher([ # split_reduceop (UPat(Ops.REDUCE_AXIS, name="reduce", src=(UPat.var("x"),)), split_reduceop), # COPY(CONST) creates a new CONST on the destination device - (UPat(Ops.COPY, name="root", src=(UPat.cvar("x"), UPat())), lambda root,x: root.const_like(x.arg)), + (UPat(Ops.COPY, name="root", src=(UPat(), UPat.cvar("x"),)), lambda root,x: root.const_like(x.arg)), # no COPY to same device, except clone (arg is True) - (UPat(Ops.COPY, src=(UPat.var("copyin"), UPat()), name="copy"), + (UPat(Ops.COPY, src=(UPat(), UPat.var("copyin")), name="copy"), lambda copyin,copy: copyin if copyin.device == copy.device and copy.arg is not True else None), # store a shrink before COPY, otherwise view after the COPY - (UPat(Ops.COPY, src=(UPat(Ops.VIEW, name="v"), UPat()), name="copy"), lambda copy,v: v.contiguous().copy_to_device(copy.device) \ + (UPat(Ops.COPY, src=(UPat(), UPat(Ops.VIEW, name="v")), name="copy"), lambda copy,v: v.contiguous().copy_to_device(copy.device) \ if prod(v.shape) < prod(v.base.shape) else v.base.copy_to_device(copy.device, clone=copy.arg).view(v.st)), # remove cast to image when it's already a contiguous image (UPat(Ops.CAST, name="cast", src=(UPat(Ops.VIEW, name="vm", src=(UPat(Ops.CONTIGUOUS, name="base"),)),)), @@ -124,7 +124,7 @@ do_realize = PatternMatcher([ # realize before expand or unsafe pad ops (UPat(Ops.VIEW, src=(UPat(GroupOp.All-DONT_PUSH_VIEWS, name="tr"),), name="view"), realize_before_view), # realize before COPY - (UPat(Ops.COPY, src=(UPat(GroupOp.All-DONT_PUSH_VIEWS, name="tr"), UPat())), realize), + (UPat(Ops.COPY, src=(UPat(), UPat(GroupOp.All-DONT_PUSH_VIEWS, name="tr"))), realize), ]) def recursive_group(tr:UOp, st:ShapeTracker, r:UOp, children:defaultdict[UOp, dict[UOp, None]], realizes:dict[UOp, None], @@ -249,7 +249,7 @@ create_kernels = merge_views+PatternMatcher([ (UPat.assign(UPat.var("b"), UPat(GroupOp.All-{Ops.KERNEL}), name="x"), create_kernel), (UPat(Ops.CONTIGUOUS, name="x"), lambda ctx,x: create_kernel(ctx, x, UOp.new_buffer(x.device, x.size, x.dtype))), # create a buffer for COPY on the new device - (UPat(Ops.COPY, src=(UPat(), UPat(Ops.DEVICE, name="d")), name="x"), lambda ctx,d,x: create_kernel(ctx, x, UOp.new_buffer(d.arg, x.size, x.dtype))), + (UPat(Ops.COPY, src=(UPat(Ops.DEVICE, name="d"), UPat()), name="x"), lambda ctx,d,x: create_kernel(ctx, x, UOp.new_buffer(d.arg, x.size, x.dtype))), # otherwise check the context if we're realizing this UOp (UPat(GroupOp.All-DONT_PLACE_IN_KERNEL, name="x"), lambda ctx,x: create_kernel(ctx, x, UOp.new_buffer(x.device, x.size, x.dtype)) if x in ctx.realizes else None), diff --git a/tinygrad/engine/multi.py b/tinygrad/engine/multi.py index 58e3c84c68..e8909df262 100644 --- a/tinygrad/engine/multi.py +++ b/tinygrad/engine/multi.py @@ -152,7 +152,7 @@ multi_pm = PatternMatcher([ (UPat(Ops.SHRINK, src=(UPat(Ops.MULTI, name="multi"), ), name="root"), shrink_multi), (UPat(Ops.FLIP, src=(UPat(Ops.MULTI, name="multi"), ), name="root"), flip_multi), (UPat(Ops.ASSIGN, src=(UPat(Ops.MULTI, name="dest"), UPat(Ops.MULTI, name="src"))), assign_multi), - (UPat(Ops.COPY, src=(UPat(Ops.MULTI, name="multi"), UPat(Ops.DEVICE, name="device"))), copy_multi), + (UPat(Ops.COPY, src=(UPat(Ops.DEVICE, name="device"), UPat(Ops.MULTI, name="multi"), )), copy_multi), (UPat((Ops.CAST, Ops.BITCAST, Ops.CONTIGUOUS, Ops.DETACH, Ops.CONTIGUOUS_BACKWARD, Ops.FUSE), src=(UPat(Ops.MULTI, name="multi"), ), name="root"), passthrough_multi), ]) diff --git a/tinygrad/ops.py b/tinygrad/ops.py index 1d2a5b2ae1..a0f9c78368 100644 --- a/tinygrad/ops.py +++ b/tinygrad/ops.py @@ -490,7 +490,7 @@ class UOp(MathTrait, metaclass=UOpMetaClass): assert op is Ops.BIND, f"unknown op {op}" var, val = arg.unbind() return var.replace(src=(UOp(Ops.VIEW, dtypes.void, (UOp(Ops.DEVICE, arg=device),), ShapeTracker.from_shape(shape)),)).bind(val) - def copy_to_device(self, device:str|tuple[str, ...], clone:bool=False): return UOp(Ops.COPY, self.dtype, (self, UOp(Ops.DEVICE, arg=device)), clone) + def copy_to_device(self, device:str|tuple[str, ...], clone:bool=False): return UOp(Ops.COPY, self.dtype, (UOp(Ops.DEVICE, arg=device), self), clone) def clone(self) -> UOp: return self.copy_to_device(self.device, clone=True) @property def metadata(self) -> tuple[Metadata, ...]|Metadata|None: return self.arg.metadata if self.op is Ops.KERNEL else all_metadata.get(self, None) @@ -532,8 +532,7 @@ class UOp(MathTrait, metaclass=UOpMetaClass): def _device(self) -> Optional[str|tuple[str, ...]]: if self.op is Ops.DEVICE: return self.arg if self.op is Ops.MULTI: return tuple(cast(str, x.device) for x in self.src) - # TODO: it's -1 for copy, copy should support multiple - return dsrcs[-1]._device if len(dsrcs:=[x for x in self.src if x._device is not None]) != 0 else None + return dsrcs[0]._device if len(dsrcs:=[x for x in self.src if x._device is not None]) != 0 else None @property def buf_uop(self) -> UOp: if self.op is Ops.BUFFER: return self diff --git a/tinygrad/spec.py b/tinygrad/spec.py index 53ba4398dc..89318ccf60 100644 --- a/tinygrad/spec.py +++ b/tinygrad/spec.py @@ -78,7 +78,7 @@ tensor_uop_spec = buffer_spec+assign_spec+PatternMatcher([ # COPY # NOTE: the arg here specifies clone=True, which prevents folding same device copy - (UPat(Ops.COPY, name="copy", src=(UPat.var("x"), UPat(Ops.DEVICE))), lambda copy,x: isinstance(copy.arg, bool) and copy.dtype == x.dtype), + (UPat(Ops.COPY, name="copy", src=(UPat(Ops.DEVICE), UPat.var("x"))), lambda copy,x: isinstance(copy.arg, bool) and copy.dtype == x.dtype), ]) # ***** uop type spec *****