From 60590cf8b540bfc2d291d2dc5a3080f70e6d9f97 Mon Sep 17 00:00:00 2001 From: Roelof van Dijk <3604013+roelofvandijk@users.noreply.github.com> Date: Sun, 3 Sep 2023 02:43:29 +0200 Subject: [PATCH] perf: create buffer only when needed (#1684) Co-authored-by: Roelof van Dijk --- tinygrad/lazy.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tinygrad/lazy.py b/tinygrad/lazy.py index bdaa4b4860..a9498be19d 100644 --- a/tinygrad/lazy.py +++ b/tinygrad/lazy.py @@ -225,13 +225,12 @@ class LazyBuffer: def shuffle_and_prune_movement_ops(self, st: ShapeTracker, op: MovementOps, arg: Union[Tuple[Union[Node,int], ...], Tuple[Tuple[int, int], ...]]) -> LazyBuffer: if SHUFFLE_MOVEMENT_OPS and self.optype == BinaryOps and not self.realized and (op in {MovementOps.SHRINK, MovementOps.STRIDE, MovementOps.PERMUTE} or (op == MovementOps.RESHAPE and self.op.op in UnaryOps)) and not self.children: return self.op.replace_with_movement_ops([(op, arg)]) - ret = create_lazybuffer(self.device, st, MovementOps, LazyOp(op, (self,), arg), self.dtype) - if REMOVE_MOVEMENT_NOPS and not self.realized and not ret.realized and ret.st.contiguous: + if REMOVE_MOVEMENT_NOPS and not self.realized and st.contiguous: # MovementOps aren't stacked any more, they each have one parent, find the root root = get_movementroot(self) - if root.st.contiguous and root != self and prod(ret.st.shape) == prod(root.shape): - return root.reshape(ret.st.shape) - return ret + if root.st.contiguous and root != self and prod(st.shape) == prod(root.shape): + return root.reshape(st.shape) + return create_lazybuffer(self.device, st, MovementOps, LazyOp(op, (self,), arg), self.dtype) def _reduce_op(self:LazyBuffer, op:ReduceOps, new_shape:Tuple[int, ...]) -> LazyBuffer: if self.shape == tuple(new_shape): return self