From 46e7dfade1c3278b41daf5721dac8aeca44af01f Mon Sep 17 00:00:00 2001 From: George Hotz Date: Tue, 19 Jul 2022 11:42:14 -0700 Subject: [PATCH] REQUIRES_SIMPLE_REDUCE --- tinygrad/llops/ops_gpu.py | 1 + tinygrad/ops.py | 10 +++++++++- tinygrad/tensor.py | 12 ++++-------- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/tinygrad/llops/ops_gpu.py b/tinygrad/llops/ops_gpu.py index 2205c73806..a6c168ec2e 100644 --- a/tinygrad/llops/ops_gpu.py +++ b/tinygrad/llops/ops_gpu.py @@ -112,6 +112,7 @@ class GPUBuffer: def movement_op(x, op:MovementOps, arg) -> GPUBuffer: return type(x)(ShapeTracker(x.st).movement_op(op, arg), x) def reduce_op(x, op:ReduceOps, new_shape:Tuple[int, ...]): return type(x)(new_shape)._processing_op([("A", x)], code="acc", earlycode=GPUBuffer.code_for_op[op], earlybufs=set("A"), start=GPUBuffer.start_for_op[op]) + #REQUIRES_SIMPLE_REDUCE = True def _processing_op(ret, bufs: List[Tuple[str, GPUBuffer]]=[], code:str="acc", C:Optional[ConvArgs]=None, start="0.0", reduce_shape=None, earlybufs:Set[str]=set(), earlycode:str="acc") -> GPUBuffer: assert C is None diff --git a/tinygrad/ops.py b/tinygrad/ops.py index 74dbde0cec..c3d8508d84 100644 --- a/tinygrad/ops.py +++ b/tinygrad/ops.py @@ -245,8 +245,16 @@ class LazyBuffer: def binary_op(x:LazyBuffer, op:BinaryOps, y:LazyBuffer) -> LazyBuffer: return elementwise_op(op, x, y) def contiguous_op(x:LazyBuffer) -> LazyBuffer: return x if x.st.contiguous else x.unary_op(UnaryOps.NOOP) + # TODO: permute to put all the reduce axis at the end def reduce_op(x:LazyBuffer, op:ReduceOps, new_shape:Tuple[int, ...]) -> LazyBuffer: - return LazyBuffer(x.device, tuple(new_shape), ReduceOps, LazyOp(op, (x,), tuple(new_shape))) if x.shape != tuple(new_shape) else x + if x.shape == tuple(new_shape): return x + if getattr(x.dbuffer, "REQUIRES_SIMPLE_REDUCE", False) and (len(new_shape) != 2 or new_shape[1] != 1): + num, red = prod([s for s,n in zip(x.shape, new_shape) if n != 1]), prod([s for s,n in zip(x.shape, new_shape) if n == 1]) + x = x.movement_op(MovementOps.PERMUTE, [i for i,n in enumerate(new_shape) if n != 1] + [i for i,n in enumerate(new_shape) if n == 1]) + x = x.movement_op(MovementOps.RESHAPE, (num, red)) + return x.reduce_op(op, (num, 1)).movement_op(MovementOps.RESHAPE, new_shape) + else: + return LazyBuffer(x.device, tuple(new_shape), ReduceOps, LazyOp(op, (x,), tuple(new_shape))) # syntactic sugar around PAD and SHRINK # TODO: turn RESHAPE into EXPAND and CONTRACT (current EXPAND should be REPEAT) diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 9b4454f4cd..ce0bd5c4d7 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -23,10 +23,8 @@ class Tensor: if isinstance(data, np.ndarray): if data.shape == tuple(): data = data.reshape((1,)) self.lazydata = LazyBuffer.fromCPU(data.astype(np.float32), device) - elif isinstance(data, LazyBuffer): - self.lazydata = data - else: - raise Exception(f"can't create Tensor from {data}") + elif isinstance(data, LazyBuffer): self.lazydata = data + else: raise Exception(f"can't create Tensor from {data}") # tensors have gradients, buffers do not self.grad : Optional[Tensor] = None @@ -55,8 +53,7 @@ class Tensor: return self def assign(self, x): - if not isinstance(x, Tensor): - x = Tensor(x) + if not isinstance(x, Tensor): x = Tensor(x) assert self.shape == x.shape self.lazydata = x.lazydata return x @@ -127,8 +124,7 @@ class Tensor: for g in ([grads] if len(t0._ctx.parents) == 1 else grads)] for t, g in zip(t0._ctx.parents, grads): if g is not None and t.requires_grad: - assert g.shape == t.shape, \ - f"grad shape must match tensor shape in {self._ctx!r}, {g.shape!r} != {t.shape!r}" + assert g.shape == t.shape, f"grad shape must match tensor shape in {self._ctx!r}, {g.shape!r} != {t.shape!r}" t.grad = g if t.grad is None else (t.grad + g) # ***** non first class ops (hlops) *****