move print tree into graph (#2003)

* move print tree into graph * add winograd profiling test * change pre-commit to run ruff first
2026-01-08 22:48:25 -05:00 · 2023-10-07 04:39:21 -07:00
parent 2114dc13d1
commit f54959e5cd
7 changed files with 32 additions and 28 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,12 @@
 repos:
  - repo: local
    hooks:
+      - id: ruff
+        name: ruff
+        entry: ruff .
+        language: system
+        always_run: true
+        pass_filenames: false
      - id: docs
        name: docs
        entry: python3 docs/abstractions.py
@@ -15,19 +21,13 @@ repos:
        pass_filenames: false
      - id: mypy
        name: mypy
-        entry: mypy tinygrad/ extra/helpers.py # --warn-return-any
+        entry: mypy tinygrad/ extra/helpers.py
        language: system
        always_run: true
        pass_filenames: false
      - id: tests
        name: subset of (CPU) tests
-        entry: env CPU=1 pytest test/unit/ test/test_ops.py test/test_dtype.py test/test_schedule.py
-        language: system
-        always_run: true
-        pass_filenames: false
-      - id: pylint
-        name: pylint
-        entry: pylint tinygrad/
+        entry: env CPU=1 pytest test/unit/ test/test_ops.py test/test_dtype.py test/test_schedule.py test/test_custom_function.py test/test_assign.py test/test_symbolic_shapetracker.py
        language: system
        always_run: true
        pass_filenames: false
--- a/examples/handcode_resnet50_opt.py
+++ b/examples/handcode_resnet50_opt.py
@@ -6,7 +6,7 @@ from tinygrad.codegen.kernel import LinearizerOptions
 from tinygrad.codegen.linearizer import Linearizer
 from tinygrad.runtime.ops_metal import renderer, MetalProgram, RawMetalBuffer
 from tinygrad.helpers import ansilen, DEBUG
-from extra.utils import print_tree
+from tinygrad.graph import print_tree

 if __name__ == "__main__":
  mdl = ResNet50()
--- a/extra/utils.py
+++ b/extra/utils.py
@@ -212,13 +212,3 @@ def get_child(parent, key):
    else:
      obj = getattr(obj, k)
  return obj
-
-def _tree(lazydata, prefix=""):
-  if type(lazydata).__name__ == "LazyBuffer": return [f"━━ realized {lazydata.dtype.name} {lazydata.shape}"] if (lazydata.realized) else _tree(lazydata.op, "LB ")
-  if len(lazydata.src) == 0: return [f"━━ {prefix}{lazydata.op.name} {lazydata.arg if lazydata.arg else ''}"]
-  lines = [f"━┳ {prefix}{lazydata.op.name} {lazydata.arg if lazydata.arg else ''}"]
-  childs = [_tree(c) for c in lazydata.src[:]]
-  for c in childs[:-1]: lines += [f" ┣{c[0]}"] + [f" ┃{l}" for l in c[1:]]
-  return lines + [" ┗"+childs[-1][0]] + ["  "+l for l in childs[-1][1:]]
-
-def print_tree(tensor:Union[Tensor, LazyBuffer]):print("\n".join([f"{str(i).rjust(3)} {s}" for i,s in enumerate(_tree(tensor if not isinstance(tensor, Tensor) else tensor.lazydata))]))
--- a/test/test_schedule.py
+++ b/test/test_schedule.py
@@ -8,7 +8,7 @@ from tinygrad.tensor import Tensor
 from tinygrad.ops import LoadOps, Device, Compiled
 from tinygrad.helpers import DEBUG, dtypes
 from tinygrad.codegen.linearizer import Linearizer
-from tinygrad.graph import log_schedule_item
+from tinygrad.graph import log_schedule_item, print_tree
 from tinygrad import nn

 def check_schedule(t:Tensor, allowed:int, to_prerealize:Optional[List[Tensor]]=None, filter_loadops=True):
@@ -23,7 +23,6 @@ def check_schedule(t:Tensor, allowed:int, to_prerealize:Optional[List[Tensor]]=N
  if filter_loadops: sched = [s for s in sched if s[0].op not in LoadOps]
  if len(sched) != allowed: print(f"SCHEDULE ISSUE, expecting {allowed} got {len(sched)}")
  if len(sched) != allowed or DEBUG >= 3:
-    from extra.utils import print_tree
    for i, s in enumerate(sched):
      print("op", i)
      print_tree(s[0])
--- a/test/test_winograd.py
+++ b/test/test_winograd.py
@@ -3,6 +3,7 @@ from tinygrad.helpers import Timing
 from tinygrad.tensor import Tensor
 from tinygrad.ops import LoadOps
 from tinygrad.codegen.linearizer import Linearizer
+from test.test_net_speed import start_profile, stop_profile

 class TestWinograd(unittest.TestCase):
  def setUp(self):
@@ -28,5 +29,12 @@ class TestWinograd(unittest.TestCase):
        l.hand_coded_optimizations()
        l.linearize()

+  def test_profile(self):
+    x,w = Tensor.rand(1,4,9,9).realize(), Tensor.rand(4,4,3,3).realize()
+    pr = start_profile()
+    out = Tensor.conv2d(x,w).realize()
+    stop_profile(pr, sort='time')
+    out.numpy()
+
 if __name__ == '__main__':
  unittest.main(verbosity=2)
--- a/tinygrad/graph.py
+++ b/tinygrad/graph.py
@@ -72,3 +72,13 @@ def log_schedule_item(iop: LazyOp, ret: 'LazyBuffer', inp: Tuple['LazyBuffer', .
    G.nodes[nm(ret)]['fillcolor'] = top_colors[optype]
    G.nodes[nm(ret)]['color'] = 'black'
    G.nodes[nm(ret)]['style'] = 'filled'
+
+def _tree(lazydata, prefix=""):
+  if type(lazydata).__name__ == "LazyBuffer": return [f"━━ realized {lazydata.dtype.name} {lazydata.shape}"] if (lazydata.realized) else _tree(lazydata.op, "LB ")
+  if len(lazydata.src) == 0: return [f"━━ {prefix}{lazydata.op.name} {lazydata.arg if lazydata.arg else ''}"]
+  lines = [f"━┳ {prefix}{lazydata.op.name} {lazydata.arg if lazydata.arg else ''}"]
+  childs = [_tree(c) for c in lazydata.src[:]]
+  for c in childs[:-1]: lines += [f" ┣{c[0]}"] + [f" ┃{l}" for l in c[1:]]
+  return lines + [" ┗"+childs[-1][0]] + ["  "+l for l in childs[-1][1:]]
+
+def print_tree(lazydata:LazyOp): print("\n".join([f"{str(i).rjust(3)} {s}" for i,s in enumerate(_tree(lazydata))]))
--- a/tinygrad/realize.py
+++ b/tinygrad/realize.py
@@ -1,15 +1,13 @@
 from typing import List, Tuple, cast, Dict, Callable
 import numpy as np
 from tinygrad.ops import LazyOp, LoadOps, Device, UnaryOps, BufferOps, MemBuffer, get_lazyop_info
-from tinygrad.graph import log_schedule_item
+from tinygrad.graph import log_schedule_item, print_tree
 from tinygrad.lazy import LazyBuffer
 from tinygrad.helpers import DEBUG, prod, all_int, getenv, IMAGE, ImageDType, dtypes

 from tinygrad.runtime.lib import RawBufferMapped, RawBufferTransfer
 from tinygrad.runtime.ops_disk import RawDiskBuffer

-P2P = getenv("P2P", 0)
-
 def fix_schedule_for_images(schedule:List[Tuple[LazyOp, LazyBuffer, Tuple[LazyBuffer, ...]]]):
  # this is the fundamental fix, find unwritable or unreadable images and convert them to normal float32 (TODO: should it be float16?)
  for op,out,buffers in schedule:
@@ -41,6 +39,7 @@ def fix_schedule_for_images(schedule:List[Tuple[LazyOp, LazyBuffer, Tuple[LazyBu
    fixed_schedule.append((op, out, buffers))
  return fixed_schedule

+# *** this is where things happen ***

 def run_schedule(schedule:List[Tuple[LazyOp, LazyBuffer, Tuple[LazyBuffer, ...]]]):
  # HACK: images can be not usable due to shape
@@ -51,9 +50,7 @@ def run_schedule(schedule:List[Tuple[LazyOp, LazyBuffer, Tuple[LazyBuffer, ...]]
    op,out,buffers = schedule.pop(0)
    log_schedule_item(op, out, buffers)
    assert all(x.realized for x in buffers), "can't run schedule, some buffers aren't realized"
-    if DEBUG >= 3:
-      from extra.utils import print_tree   # type: ignore
-      print_tree(op)
+    if DEBUG >= 3: print_tree(op)
    if op.op in LoadOps:
      # confirm the LoadOps are contiguous and in order
      for i,s in enumerate(op.src): assert isinstance(s, LazyOp) and s.op == BufferOps.MEM and s.arg.idx == i+1 and s.arg.st.contiguous, f"bad LoadOps src {i}: {s}"
@@ -89,7 +86,7 @@ def _realize_from(buffer: LazyBuffer, src: LazyBuffer) -> None:
    assert all_int(buffer.shape), "does not support symbolic shape"
    buffer.realized = Device[buffer.device].buffer(prod(buffer.shape), buffer.dtype, **buffer._device_extra_args())
    src.realized.readinto(cast(RawBufferMapped, buffer.realized)._buffer())
-  elif isinstance(src.realized, RawBufferTransfer) and issubclass(Device[buffer.device].buffer, RawBufferTransfer) and P2P >= 1:
+  elif isinstance(src.realized, RawBufferTransfer) and issubclass(Device[buffer.device].buffer, RawBufferTransfer) and getenv("P2P", 0) >= 1:
    buffer.realized = cast(RawBufferTransfer, Device[buffer.device].buffer).transfer(src.realized, buffer.shape, buffer.dtype, **buffer._device_extra_args())
  else:
    # TODO: schedule this as FROM to go to CPU, and a FROM to go to device