From 60ffe00172887ea085bea5c9fe74bc0eb17c2085 Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 16 Jul 2025 18:30:14 -0400 Subject: [PATCH] remove Kernel.first_reduce [pr] (#11269) --- tinygrad/opt/heuristic.py | 15 ++++++++------- tinygrad/opt/kernel.py | 3 --- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tinygrad/opt/heuristic.py b/tinygrad/opt/heuristic.py index ff55533f5e..13f259c073 100644 --- a/tinygrad/opt/heuristic.py +++ b/tinygrad/opt/heuristic.py @@ -16,11 +16,12 @@ def hand_coded_optimizations(k:Kernel) -> list[Opt]: st0, st1 = k.sts[k.bufs.index(mulop.src[0])], k.sts[k.bufs.index(mulop.src[1])] strides0, strides1 = st0.real_strides(), st1.real_strides() def has_expanded_axis(shape, strides): return any(resolve(s > 1) and not resolve(st != 0) for s,st in zip(shape,strides)) - if strides0[k.first_reduce] == 1 and not (has_expanded_axis(st0.shape, strides0) and has_expanded_axis(st1.shape, strides1)): + if strides0[first_reduce:=(k.axes_of(AxisType.REDUCE)[0])] == 1 and \ + not (has_expanded_axis(st0.shape, strides0) and has_expanded_axis(st1.shape, strides1)): for global_idx in k.axes_of(AxisType.GLOBAL): - if k.full_shape[k.first_reduce]%MV_THREADS_PER_ROW == 0 and k.full_shape[global_idx]%(MV_BLOCKSIZE*MV_ROWS_PER_THREAD) == 0: + if k.full_shape[first_reduce]%MV_THREADS_PER_ROW == 0 and k.full_shape[global_idx]%(MV_BLOCKSIZE*MV_ROWS_PER_THREAD) == 0: if DEBUG >= 3: - print(f"MATVEC: {k.full_shape=} {k.first_reduce=} {strides0=} {MV_BLOCKSIZE=} {MV_THREADS_PER_ROW=} {MV_ROWS_PER_THREAD=}") + print(f"MATVEC: {k.full_shape=} {first_reduce=} {strides0=} {MV_BLOCKSIZE=} {MV_THREADS_PER_ROW=} {MV_ROWS_PER_THREAD=}") if MV_THREADS_PER_ROW > 1: k.apply_opt(Opt(OptOps.GROUP, 0, MV_THREADS_PER_ROW)) if MV_BLOCKSIZE > 1: k.apply_opt(Opt(OptOps.LOCAL, global_idx, MV_BLOCKSIZE)) if MV_ROWS_PER_THREAD > 1: k.apply_opt(Opt(OptOps.UPCAST, global_idx, MV_ROWS_PER_THREAD)) @@ -41,7 +42,7 @@ def hand_coded_optimizations(k:Kernel) -> list[Opt]: if (axis:=unit_stride_axes_mul_4[0]) in k.upcastable_dims: k.apply_opt(Opt(OptOps.UPCAST, axis, 4)) elif axis in k.unrollable_dims: - k.apply_opt(Opt(OptOps.UNROLL, axis-k.first_reduce, 4)) + k.apply_opt(Opt(OptOps.UNROLL, k.unrollable_dims.index(axis), 4)) # no more opt if we are grouping if k.group_for_reduces: return k.applied_opts @@ -82,14 +83,14 @@ def hand_coded_optimizations(k:Kernel) -> list[Opt]: upcast_size = prod(k.full_shape[a] for a in k.axes_of(AxisType.UPCAST, AxisType.UNROLL)) if k.unrollable_dims and (upcast_size <= 4 or not k.axes_of(AxisType.UNROLL)) and (upcast_size < 64): if (s:=k.full_shape[k.unrollable_dims[-1]]) <= 32: - k.apply_opt(Opt(OptOps.UNROLL, k.unrollable_dims[-1]-k.first_reduce, 0)) + k.apply_opt(Opt(OptOps.UNROLL, len(k.unrollable_dims)-1, 0)) # if it's small, upcast a second reduce dimension too if k.unrollable_dims and s <= 3 and k.full_shape[k.unrollable_dims[-1]] <= 3: - k.apply_opt(Opt(OptOps.UNROLL, k.unrollable_dims[-1]-k.first_reduce, 0)) + k.apply_opt(Opt(OptOps.UNROLL, len(k.unrollable_dims)-1, 0)) else: for splits in [4]: if k.full_shape[axis:=k.unrollable_dims[-1]]%splits == 0: - k.apply_opt(Opt(OptOps.UNROLL, axis-k.first_reduce, splits)) + k.apply_opt(Opt(OptOps.UNROLL, len(k.unrollable_dims)-1, splits)) break # if nothing at all is upcasted and it's easy to, do an upcast diff --git a/tinygrad/opt/kernel.py b/tinygrad/opt/kernel.py index 899bd78881..bfcaf555a4 100644 --- a/tinygrad/opt/kernel.py +++ b/tinygrad/opt/kernel.py @@ -114,9 +114,6 @@ class Kernel: return ret - @property - def first_reduce(self) -> int: return self.axes_of(AxisType.GROUP_REDUCE, AxisType.REDUCE)[0] - @property def reduceop(self) -> UOp|None: return self.reduceops[0] if len(self.reduceops) > 0 else None @property