opt transforms the ast into an optimized ast (#10900)

* opt transforms the ast into an optimized ast * fix get_kernel order and to_function_name * function_name property * update docs * copy from kernel.py * improve docs * ci didn't trigger?
2026-01-09 15:08:02 -05:00 · 2025-06-22 09:41:26 -07:00
parent ffddf165f8
commit b09c47366f
8 changed files with 96 additions and 24 deletions
--- a/docs/developer/layout.md
+++ b/docs/developer/layout.md
@@ -1,6 +1,8 @@
 # tinygrad directory layout
-Listed in order of how they are processed
+This explains the flow of a big graph down to programs.
 Directories are listed in order of how they are processed.
 ---
@@ -20,7 +22,11 @@ Group UOps into kernels.
 Transforms the ast into an optimized ast. This is where BEAM search and heuristics live.
-When finished, this will just have a function that takes in the ast and returns the optimized ast.
+::: tinygrad.opt.get_optimized_ast
    options:
        members: false
        show_labels: false
        show_source: false
 ---
@@ -38,10 +44,23 @@ Transform the optimized ast into a linearized list of UOps.
 ## tinygrad/renderer
-Transform the linearized list of UOps into a program.
+Transform the linearized list of UOps into a program, represented as a string.
 ::: tinygrad.renderer.Renderer
    options:
        members:
            - render
        show_labels: false
        show_source: false
 ---
 ## tinygrad/engine
 Abstracted high level interface to the runtimes.
 ::: tinygrad.engine.realize.get_program
    options:
        members: false
        show_labels: false
        show_source: false
--- a/tinygrad/engine/realize.py
+++ b/tinygrad/engine/realize.py
@@ -1,29 +1,51 @@
 from typing import Optional, cast, Generator
 import time, pprint
 from dataclasses import dataclass, replace, field
-from tinygrad.helpers import all_same, colored, getenv, DEBUG, GlobalCounters, ansilen, BEAM, NOOPT, all_int, CAPTURING, Metadata, TRACEMETA
+from tinygrad.helpers import all_same, colored, DEBUG, GlobalCounters, ansilen, BEAM, NOOPT, all_int, CAPTURING, Metadata, TRACEMETA
-from tinygrad.helpers import DEVECTORIZE, time_to_str, VALIDATE_WITH_CPU
+from tinygrad.helpers import DEVECTORIZE, time_to_str, VALIDATE_WITH_CPU, getenv
-from tinygrad.uop.ops import Ops, PatternMatcher, UOp, UPat, Variable, sym_infer
+from tinygrad.uop.ops import Ops, PatternMatcher, UOp, UPat, Variable, sym_infer, graph_rewrite, print_uops, track_rewrites
 from tinygrad.device import Device, Buffer
 from tinygrad.renderer import Renderer, ProgramSpec, Estimates
 from tinygrad.opt.kernel import Kernel
 from tinygrad.opt.heuristic import hand_coded_optimizations
 from tinygrad.engine.schedule import ScheduleItem
 from tinygrad.opt import get_optimized_ast
 from tinygrad.codegen import full_rewrite
 from tinygrad.uop.spec import type_verify
 # **************** Program Creation ****************
-logkerns, logkerns_level = open(getenv("LOGKERNS", ""), "a") if getenv("LOGKERNS", "") else None, getenv("LOGKERNS_LEVEL", 1)
+@track_rewrites(name=lambda _ast,_renderer,ret:ret.name)
 def get_program(ast:UOp, renderer:Renderer) -> ProgramSpec:
-  k = Kernel(ast, opts=renderer)
+  """
-  if not NOOPT:
+  Transform an AST into a ProgramSpec. May trigger BEAM search.
-    if not k.apply_tensor_cores(getenv("TC", 1)): k.apply_opts(hand_coded_optimizations(k))
+
-    if BEAM >= 1:
+  Args:
-      from tinygrad.opt.search import beam_search, bufs_from_lin
+    ast: The Ops.SINK rooted AST
-      kb = Kernel(ast, opts=renderer)
+    renderer: The renderer used to generate the code
-      rawbufs = bufs_from_lin(kb, allocate=False)
+
-      k = beam_search(kb, rawbufs, BEAM.value, bool(getenv("BEAM_ESTIMATE", 1)))
+  Returns:
-  if logkerns is not None: logkerns.writelines([f"{(k.ast, k.applied_opts)}\n"])
+    The ProgramSpec of the program.
-  return k.to_program()
+  """
  if getenv("VIZ"): graph_rewrite(ast, PatternMatcher([]), name="View Base AST")
  modified_ast = get_optimized_ast(ast, renderer) if ast.arg is None else ast
  if __debug__: type_verify(list(modified_ast.toposort()))
  # linearize
  try:
    uops = full_rewrite(modified_ast, renderer)
  except RuntimeError:
    print("***** LINEARIZE FAILURE *****")
    print(f"ast = {ast}")
    print(f"opts = {modified_ast.arg.applied_opts}")
    raise
  assert uops[-1].op is Ops.SINK, "last uop must be sink"
  # print and render
  if DEBUG >= 6: print_uops(uops)
  src = renderer.render(uops)
  return ProgramSpec(uops[-1].arg.name, src, renderer.device, ast, uops,
                     global_size=[1,1,1] if renderer.has_local else None, local_size=[1,1,1] if renderer.has_local else None)
 # **************** Runners ****************
--- a/tinygrad/opt/init.py
+++ b/tinygrad/opt/init.py
@@ -0,0 +1,29 @@
 # opt opinionatedly transforms an ast into an optimized ast using either heuristics or beam search
 from tinygrad.opt.kernel import Kernel
 from tinygrad.opt.heuristic import hand_coded_optimizations
 from tinygrad.uop.ops import UOp
 from tinygrad.helpers import NOOPT, BEAM, getenv
 from tinygrad.renderer import Renderer
 def get_optimized_ast(ast:UOp, renderer:Renderer) -> UOp:
  """
  Optimize an AST based on heuristics or BEAM search.
  Args:
    ast: The Ops.SINK rooted AST
    renderer: The renderer used to generate the code
  Returns:
    The Ops.SINK rooted AST transformed to apply the opts and with a KernelInfo in the arg.
  """
  k = Kernel(ast, opts=renderer)
  if not NOOPT:
    if not k.apply_tensor_cores(getenv("TC", 1)): k.apply_opts(hand_coded_optimizations(k))
    if BEAM >= 1:
      from tinygrad.opt.search import beam_search, bufs_from_lin
      kb = Kernel(ast, opts=renderer)
      rawbufs = bufs_from_lin(kb, allocate=False)
      k = beam_search(kb, rawbufs, BEAM.value, bool(getenv("BEAM_ESTIMATE", 1)))
  return k.get_optimized_ast()
--- a/tinygrad/opt/kernel.py
+++ b/tinygrad/opt/kernel.py
@@ -457,7 +457,7 @@ class Kernel:
        # otherwise we just replace the VIEW source
        return ret.replace(src=(ret.src[0].replace(arg=st),)+ret.src[1:])
      if op.op is Ops.SINK:
-        return ret.replace(arg = KernelInfo(to_function_name(self.name) if name_override is None else name_override,
+        return ret.replace(arg = KernelInfo(self.name if name_override is None else name_override,
                                            self.local_dims, self.upcasted, self.dont_use_locals, tuple(self.applied_opts)))
      if op.op is Ops.REDUCE_AXIS:
        reduce_idx = len(self.bufs) + self.reduceops.index(op) * 2
--- a/tinygrad/renderer/cstyle.py
+++ b/tinygrad/renderer/cstyle.py
@@ -135,7 +135,7 @@ class CStyleLanguage(Renderer):
    name = "test"
    for u in uops:
      if u.op is Ops.SINK:
-        if u.arg is not None: name = u.arg.name
+        if u.arg is not None: name = u.arg.function_name
        continue
      if u.op in (Ops.DEFINE_GLOBAL, Ops.DEFINE_VAR):
        r[u] = f"data{u.arg}" if u.op is Ops.DEFINE_GLOBAL else u.arg[0]
--- a/tinygrad/renderer/llvmir.py
+++ b/tinygrad/renderer/llvmir.py
@@ -166,7 +166,7 @@ class LLVMRenderer(Renderer):
    name = "test"
    for u in uops:
      if u.op is Ops.SINK:
-        if u.arg is not None: name = u.arg.name
+        if u.arg is not None: name = u.arg.function_name
        continue
      if u.op in (Ops.DEFINE_GLOBAL, Ops.DEFINE_VAR):
        r[u] = f"%data{u.arg}" if u.op is Ops.DEFINE_GLOBAL else f"%{u.arg[0]}"
--- a/tinygrad/renderer/ptx.py
+++ b/tinygrad/renderer/ptx.py
@@ -177,7 +177,7 @@ class PTXRenderer(Renderer):
    name = "test"
    for u in uops:
      if u.op is Ops.SINK:
-        if u.arg is not None: name = u.arg.name
+        if u.arg is not None: name = u.arg.function_name
        continue
      if u.op is Ops.VECTORIZE:
        r[u] = [cast(str,r[x]) for x in u.src]
--- a/tinygrad/uop/ops.py
+++ b/tinygrad/uop/ops.py
@@ -6,7 +6,7 @@ from tinygrad.uop import Ops, GroupOp
 from tinygrad.uop.mathtraits import MathTrait
 from tinygrad.dtype import ConstType, ImageDType, dtypes, DType, truncate
 from tinygrad.helpers import ContextVar, all_int, prod, getenv, all_same, Context, partition, temp, unwrap, T, argfix, Metadata, flatten
-from tinygrad.helpers import PICKLE_BUFFERS, PROFILE, dedup, cdiv, cmod, diskcache_put
+from tinygrad.helpers import PICKLE_BUFFERS, PROFILE, dedup, cdiv, cmod, diskcache_put, to_function_name
 if TYPE_CHECKING:
  from tinygrad.shape.shapetracker import ShapeTracker
  from tinygrad.device import Buffer, MultiBuffer
@@ -519,6 +519,8 @@ class KernelInfo:
  upcasted: int = 0             # count that are upcasted     (this is remapping RANGE to UNROLL)
  dont_use_locals: bool = False # don't use local indexing
  applied_opts: tuple = tuple()
  @property
  def function_name(self): return to_function_name(self.name)
 # ******** ops in python ********