better function names

2026-01-23 22:08:08 -05:00 · 2023-03-04 18:27:37 -08:00
parent a77d792aff
commit f281f707bd
2 changed files with 9 additions and 7 deletions
--- a/tinygrad/codegen/ast.py
+++ b/tinygrad/codegen/ast.py
@@ -1,7 +1,7 @@
 from enum import Enum, auto
 import itertools
 from typing import List, Tuple, Optional
-from tinygrad.helpers import prod, dedup, all_same
+from tinygrad.helpers import prod, dedup, all_same, colored
 from tinygrad.ops import LazyOp, MovementOps, get_lazyop_info, get_buffers, ReduceOps, get_lazyops
 from tinygrad.shape import ShapeTracker, View, strides_for_shape

@@ -128,6 +128,11 @@ class ASTKernel:
  @property
  def upcast_in_mid_reduce_axes(self): return [j for j in range(self.first_reduce, self.first_reduce+len(self.group_for_reduce)) if self.full_shape[j] == self.sts[0].shape[j]]

+  def colorshape(self, pad=50) -> str:
+    axis = [(f"{rs:4d}", (("green" if i in self.upcast_in_mid_reduce_axes else "cyan") if i < self.first_reduce + len(self.group_for_reduce) else "red") if i >= self.first_reduce else "blue") for i, rs in enumerate(self.full_shape)]
+    axis += [(f"{s:4d}", 'magenta' if reduce else 'yellow') for s, _, reduce in self.buftokens[self.full_buf_index].axis[::-1]]
+    return ' '.join([colored(*x) for x in axis])+(" "*(pad-len(' '.join([x[0] for x in axis]))))
+
  def simplify_ones(self):
    # remove places where the shape is all ones
    # TODO: this should be factored in to multi shape stride
--- a/tinygrad/codegen/gpu.py
+++ b/tinygrad/codegen/gpu.py
@@ -5,7 +5,7 @@ from tinygrad.ops import UnaryOps, BinaryOps, ReduceOps, LazyOp, Op, ASTRunner
 from tinygrad.codegen.ast import ASTKernel, Token, Types
 from tinygrad.shape.symbolic import Node, MulNode, DivNode, SumNode, Variable, render_python
 from tinygrad.shape import ShapeTracker, View
-from tinygrad.helpers import getenv, DEBUG, prod, partition, colored, mnum, all_same
+from tinygrad.helpers import getenv, DEBUG, prod, partition, mnum, all_same

 # div is different in cl than python
 render_cl = render_python.copy()
@@ -228,10 +228,7 @@ class GPUCodegen(ASTKernel):
    self.hand_coded_optimizations()

    # fancy colored shape printer
-    if DEBUG >= 3:
-      axis = [(f"{rs:4d}", (("green" if i in self.upcast_in_mid_reduce_axes else "cyan") if i < self.first_reduce + len(self.group_for_reduce) else "red") if i >= self.first_reduce else "blue") for i, rs in enumerate(self.full_shape)]
-      axis += [(f"{s:4d}", 'magenta' if reduce else 'yellow') for s, _, reduce in self.buftokens[self.full_buf_index].axis[::-1]]
-      print(' '.join([colored(*x) for x in axis])+(" "*(50-len(' '.join([x[0] for x in axis])))), end="")
+    if DEBUG >= 3: print(self.colorshape(), end="")

    # add a local buffer for multistage reduce
    if len(self.group_for_reduce):
@@ -322,7 +319,7 @@ class GPUCodegen(ASTKernel):
      [") {\n"] + self.kernel)

    # kernel function definition
-    function_name = ("re_S" if self.reduceop else "ew_S") + '_'.join([str(x) for x in self.bufs[0].shape if x != 1])
+    function_name = ("re_S" if self.reduceop else "ew_S") + '_'.join([str(x) for x in self.full_shape])

    # painfully name the function
    if prg in GPUCodegen.kernel_name_cache: