mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 22:48:25 -05:00
move print tree into graph (#2003)
* move print tree into graph * add winograd profiling test * change pre-commit to run ruff first
This commit is contained in:
@@ -1,6 +1,12 @@
|
||||
repos:
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: ruff
|
||||
name: ruff
|
||||
entry: ruff .
|
||||
language: system
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
- id: docs
|
||||
name: docs
|
||||
entry: python3 docs/abstractions.py
|
||||
@@ -15,19 +21,13 @@ repos:
|
||||
pass_filenames: false
|
||||
- id: mypy
|
||||
name: mypy
|
||||
entry: mypy tinygrad/ extra/helpers.py # --warn-return-any
|
||||
entry: mypy tinygrad/ extra/helpers.py
|
||||
language: system
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
- id: tests
|
||||
name: subset of (CPU) tests
|
||||
entry: env CPU=1 pytest test/unit/ test/test_ops.py test/test_dtype.py test/test_schedule.py
|
||||
language: system
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
- id: pylint
|
||||
name: pylint
|
||||
entry: pylint tinygrad/
|
||||
entry: env CPU=1 pytest test/unit/ test/test_ops.py test/test_dtype.py test/test_schedule.py test/test_custom_function.py test/test_assign.py test/test_symbolic_shapetracker.py
|
||||
language: system
|
||||
always_run: true
|
||||
pass_filenames: false
|
||||
|
||||
@@ -6,7 +6,7 @@ from tinygrad.codegen.kernel import LinearizerOptions
|
||||
from tinygrad.codegen.linearizer import Linearizer
|
||||
from tinygrad.runtime.ops_metal import renderer, MetalProgram, RawMetalBuffer
|
||||
from tinygrad.helpers import ansilen, DEBUG
|
||||
from extra.utils import print_tree
|
||||
from tinygrad.graph import print_tree
|
||||
|
||||
if __name__ == "__main__":
|
||||
mdl = ResNet50()
|
||||
|
||||
@@ -212,13 +212,3 @@ def get_child(parent, key):
|
||||
else:
|
||||
obj = getattr(obj, k)
|
||||
return obj
|
||||
|
||||
def _tree(lazydata, prefix=""):
|
||||
if type(lazydata).__name__ == "LazyBuffer": return [f"━━ realized {lazydata.dtype.name} {lazydata.shape}"] if (lazydata.realized) else _tree(lazydata.op, "LB ")
|
||||
if len(lazydata.src) == 0: return [f"━━ {prefix}{lazydata.op.name} {lazydata.arg if lazydata.arg else ''}"]
|
||||
lines = [f"━┳ {prefix}{lazydata.op.name} {lazydata.arg if lazydata.arg else ''}"]
|
||||
childs = [_tree(c) for c in lazydata.src[:]]
|
||||
for c in childs[:-1]: lines += [f" ┣{c[0]}"] + [f" ┃{l}" for l in c[1:]]
|
||||
return lines + [" ┗"+childs[-1][0]] + [" "+l for l in childs[-1][1:]]
|
||||
|
||||
def print_tree(tensor:Union[Tensor, LazyBuffer]):print("\n".join([f"{str(i).rjust(3)} {s}" for i,s in enumerate(_tree(tensor if not isinstance(tensor, Tensor) else tensor.lazydata))]))
|
||||
|
||||
@@ -8,7 +8,7 @@ from tinygrad.tensor import Tensor
|
||||
from tinygrad.ops import LoadOps, Device, Compiled
|
||||
from tinygrad.helpers import DEBUG, dtypes
|
||||
from tinygrad.codegen.linearizer import Linearizer
|
||||
from tinygrad.graph import log_schedule_item
|
||||
from tinygrad.graph import log_schedule_item, print_tree
|
||||
from tinygrad import nn
|
||||
|
||||
def check_schedule(t:Tensor, allowed:int, to_prerealize:Optional[List[Tensor]]=None, filter_loadops=True):
|
||||
@@ -23,7 +23,6 @@ def check_schedule(t:Tensor, allowed:int, to_prerealize:Optional[List[Tensor]]=N
|
||||
if filter_loadops: sched = [s for s in sched if s[0].op not in LoadOps]
|
||||
if len(sched) != allowed: print(f"SCHEDULE ISSUE, expecting {allowed} got {len(sched)}")
|
||||
if len(sched) != allowed or DEBUG >= 3:
|
||||
from extra.utils import print_tree
|
||||
for i, s in enumerate(sched):
|
||||
print("op", i)
|
||||
print_tree(s[0])
|
||||
|
||||
@@ -3,6 +3,7 @@ from tinygrad.helpers import Timing
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.ops import LoadOps
|
||||
from tinygrad.codegen.linearizer import Linearizer
|
||||
from test.test_net_speed import start_profile, stop_profile
|
||||
|
||||
class TestWinograd(unittest.TestCase):
|
||||
def setUp(self):
|
||||
@@ -28,5 +29,12 @@ class TestWinograd(unittest.TestCase):
|
||||
l.hand_coded_optimizations()
|
||||
l.linearize()
|
||||
|
||||
def test_profile(self):
|
||||
x,w = Tensor.rand(1,4,9,9).realize(), Tensor.rand(4,4,3,3).realize()
|
||||
pr = start_profile()
|
||||
out = Tensor.conv2d(x,w).realize()
|
||||
stop_profile(pr, sort='time')
|
||||
out.numpy()
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(verbosity=2)
|
||||
@@ -72,3 +72,13 @@ def log_schedule_item(iop: LazyOp, ret: 'LazyBuffer', inp: Tuple['LazyBuffer', .
|
||||
G.nodes[nm(ret)]['fillcolor'] = top_colors[optype]
|
||||
G.nodes[nm(ret)]['color'] = 'black'
|
||||
G.nodes[nm(ret)]['style'] = 'filled'
|
||||
|
||||
def _tree(lazydata, prefix=""):
|
||||
if type(lazydata).__name__ == "LazyBuffer": return [f"━━ realized {lazydata.dtype.name} {lazydata.shape}"] if (lazydata.realized) else _tree(lazydata.op, "LB ")
|
||||
if len(lazydata.src) == 0: return [f"━━ {prefix}{lazydata.op.name} {lazydata.arg if lazydata.arg else ''}"]
|
||||
lines = [f"━┳ {prefix}{lazydata.op.name} {lazydata.arg if lazydata.arg else ''}"]
|
||||
childs = [_tree(c) for c in lazydata.src[:]]
|
||||
for c in childs[:-1]: lines += [f" ┣{c[0]}"] + [f" ┃{l}" for l in c[1:]]
|
||||
return lines + [" ┗"+childs[-1][0]] + [" "+l for l in childs[-1][1:]]
|
||||
|
||||
def print_tree(lazydata:LazyOp): print("\n".join([f"{str(i).rjust(3)} {s}" for i,s in enumerate(_tree(lazydata))]))
|
||||
@@ -1,15 +1,13 @@
|
||||
from typing import List, Tuple, cast, Dict, Callable
|
||||
import numpy as np
|
||||
from tinygrad.ops import LazyOp, LoadOps, Device, UnaryOps, BufferOps, MemBuffer, get_lazyop_info
|
||||
from tinygrad.graph import log_schedule_item
|
||||
from tinygrad.graph import log_schedule_item, print_tree
|
||||
from tinygrad.lazy import LazyBuffer
|
||||
from tinygrad.helpers import DEBUG, prod, all_int, getenv, IMAGE, ImageDType, dtypes
|
||||
|
||||
from tinygrad.runtime.lib import RawBufferMapped, RawBufferTransfer
|
||||
from tinygrad.runtime.ops_disk import RawDiskBuffer
|
||||
|
||||
P2P = getenv("P2P", 0)
|
||||
|
||||
def fix_schedule_for_images(schedule:List[Tuple[LazyOp, LazyBuffer, Tuple[LazyBuffer, ...]]]):
|
||||
# this is the fundamental fix, find unwritable or unreadable images and convert them to normal float32 (TODO: should it be float16?)
|
||||
for op,out,buffers in schedule:
|
||||
@@ -41,6 +39,7 @@ def fix_schedule_for_images(schedule:List[Tuple[LazyOp, LazyBuffer, Tuple[LazyBu
|
||||
fixed_schedule.append((op, out, buffers))
|
||||
return fixed_schedule
|
||||
|
||||
# *** this is where things happen ***
|
||||
|
||||
def run_schedule(schedule:List[Tuple[LazyOp, LazyBuffer, Tuple[LazyBuffer, ...]]]):
|
||||
# HACK: images can be not usable due to shape
|
||||
@@ -51,9 +50,7 @@ def run_schedule(schedule:List[Tuple[LazyOp, LazyBuffer, Tuple[LazyBuffer, ...]]
|
||||
op,out,buffers = schedule.pop(0)
|
||||
log_schedule_item(op, out, buffers)
|
||||
assert all(x.realized for x in buffers), "can't run schedule, some buffers aren't realized"
|
||||
if DEBUG >= 3:
|
||||
from extra.utils import print_tree # type: ignore
|
||||
print_tree(op)
|
||||
if DEBUG >= 3: print_tree(op)
|
||||
if op.op in LoadOps:
|
||||
# confirm the LoadOps are contiguous and in order
|
||||
for i,s in enumerate(op.src): assert isinstance(s, LazyOp) and s.op == BufferOps.MEM and s.arg.idx == i+1 and s.arg.st.contiguous, f"bad LoadOps src {i}: {s}"
|
||||
@@ -89,7 +86,7 @@ def _realize_from(buffer: LazyBuffer, src: LazyBuffer) -> None:
|
||||
assert all_int(buffer.shape), "does not support symbolic shape"
|
||||
buffer.realized = Device[buffer.device].buffer(prod(buffer.shape), buffer.dtype, **buffer._device_extra_args())
|
||||
src.realized.readinto(cast(RawBufferMapped, buffer.realized)._buffer())
|
||||
elif isinstance(src.realized, RawBufferTransfer) and issubclass(Device[buffer.device].buffer, RawBufferTransfer) and P2P >= 1:
|
||||
elif isinstance(src.realized, RawBufferTransfer) and issubclass(Device[buffer.device].buffer, RawBufferTransfer) and getenv("P2P", 0) >= 1:
|
||||
buffer.realized = cast(RawBufferTransfer, Device[buffer.device].buffer).transfer(src.realized, buffer.shape, buffer.dtype, **buffer._device_extra_args())
|
||||
else:
|
||||
# TODO: schedule this as FROM to go to CPU, and a FROM to go to device
|
||||
|
||||
Reference in New Issue
Block a user