diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 50a202ebe2..86c582afc2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,12 @@ repos: - repo: local hooks: + - id: ruff + name: ruff + entry: ruff . + language: system + always_run: true + pass_filenames: false - id: docs name: docs entry: python3 docs/abstractions.py @@ -15,19 +21,13 @@ repos: pass_filenames: false - id: mypy name: mypy - entry: mypy tinygrad/ extra/helpers.py # --warn-return-any + entry: mypy tinygrad/ extra/helpers.py language: system always_run: true pass_filenames: false - id: tests name: subset of (CPU) tests - entry: env CPU=1 pytest test/unit/ test/test_ops.py test/test_dtype.py test/test_schedule.py - language: system - always_run: true - pass_filenames: false - - id: pylint - name: pylint - entry: pylint tinygrad/ + entry: env CPU=1 pytest test/unit/ test/test_ops.py test/test_dtype.py test/test_schedule.py test/test_custom_function.py test/test_assign.py test/test_symbolic_shapetracker.py language: system always_run: true pass_filenames: false diff --git a/examples/handcode_resnet50_opt.py b/examples/handcode_resnet50_opt.py index c81f43118d..04fec585d5 100644 --- a/examples/handcode_resnet50_opt.py +++ b/examples/handcode_resnet50_opt.py @@ -6,7 +6,7 @@ from tinygrad.codegen.kernel import LinearizerOptions from tinygrad.codegen.linearizer import Linearizer from tinygrad.runtime.ops_metal import renderer, MetalProgram, RawMetalBuffer from tinygrad.helpers import ansilen, DEBUG -from extra.utils import print_tree +from tinygrad.graph import print_tree if __name__ == "__main__": mdl = ResNet50() diff --git a/extra/utils.py b/extra/utils.py index 93268b4d19..aa416bfe9d 100644 --- a/extra/utils.py +++ b/extra/utils.py @@ -212,13 +212,3 @@ def get_child(parent, key): else: obj = getattr(obj, k) return obj - -def _tree(lazydata, prefix=""): - if type(lazydata).__name__ == "LazyBuffer": return [f"━━ realized {lazydata.dtype.name} {lazydata.shape}"] if (lazydata.realized) else _tree(lazydata.op, "LB ") - if len(lazydata.src) == 0: return [f"━━ {prefix}{lazydata.op.name} {lazydata.arg if lazydata.arg else ''}"] - lines = [f"━┳ {prefix}{lazydata.op.name} {lazydata.arg if lazydata.arg else ''}"] - childs = [_tree(c) for c in lazydata.src[:]] - for c in childs[:-1]: lines += [f" ┣{c[0]}"] + [f" ┃{l}" for l in c[1:]] - return lines + [" ┗"+childs[-1][0]] + [" "+l for l in childs[-1][1:]] - -def print_tree(tensor:Union[Tensor, LazyBuffer]):print("\n".join([f"{str(i).rjust(3)} {s}" for i,s in enumerate(_tree(tensor if not isinstance(tensor, Tensor) else tensor.lazydata))])) diff --git a/test/test_schedule.py b/test/test_schedule.py index 3e97f1bc07..80702fdbf6 100644 --- a/test/test_schedule.py +++ b/test/test_schedule.py @@ -8,7 +8,7 @@ from tinygrad.tensor import Tensor from tinygrad.ops import LoadOps, Device, Compiled from tinygrad.helpers import DEBUG, dtypes from tinygrad.codegen.linearizer import Linearizer -from tinygrad.graph import log_schedule_item +from tinygrad.graph import log_schedule_item, print_tree from tinygrad import nn def check_schedule(t:Tensor, allowed:int, to_prerealize:Optional[List[Tensor]]=None, filter_loadops=True): @@ -23,7 +23,6 @@ def check_schedule(t:Tensor, allowed:int, to_prerealize:Optional[List[Tensor]]=N if filter_loadops: sched = [s for s in sched if s[0].op not in LoadOps] if len(sched) != allowed: print(f"SCHEDULE ISSUE, expecting {allowed} got {len(sched)}") if len(sched) != allowed or DEBUG >= 3: - from extra.utils import print_tree for i, s in enumerate(sched): print("op", i) print_tree(s[0]) diff --git a/test/test_winograd.py b/test/test_winograd.py index b062946a1f..ac8c338d1a 100644 --- a/test/test_winograd.py +++ b/test/test_winograd.py @@ -3,6 +3,7 @@ from tinygrad.helpers import Timing from tinygrad.tensor import Tensor from tinygrad.ops import LoadOps from tinygrad.codegen.linearizer import Linearizer +from test.test_net_speed import start_profile, stop_profile class TestWinograd(unittest.TestCase): def setUp(self): @@ -28,5 +29,12 @@ class TestWinograd(unittest.TestCase): l.hand_coded_optimizations() l.linearize() + def test_profile(self): + x,w = Tensor.rand(1,4,9,9).realize(), Tensor.rand(4,4,3,3).realize() + pr = start_profile() + out = Tensor.conv2d(x,w).realize() + stop_profile(pr, sort='time') + out.numpy() + if __name__ == '__main__': unittest.main(verbosity=2) \ No newline at end of file diff --git a/tinygrad/graph.py b/tinygrad/graph.py index b93324fd63..f9264e4bf2 100644 --- a/tinygrad/graph.py +++ b/tinygrad/graph.py @@ -72,3 +72,13 @@ def log_schedule_item(iop: LazyOp, ret: 'LazyBuffer', inp: Tuple['LazyBuffer', . G.nodes[nm(ret)]['fillcolor'] = top_colors[optype] G.nodes[nm(ret)]['color'] = 'black' G.nodes[nm(ret)]['style'] = 'filled' + +def _tree(lazydata, prefix=""): + if type(lazydata).__name__ == "LazyBuffer": return [f"━━ realized {lazydata.dtype.name} {lazydata.shape}"] if (lazydata.realized) else _tree(lazydata.op, "LB ") + if len(lazydata.src) == 0: return [f"━━ {prefix}{lazydata.op.name} {lazydata.arg if lazydata.arg else ''}"] + lines = [f"━┳ {prefix}{lazydata.op.name} {lazydata.arg if lazydata.arg else ''}"] + childs = [_tree(c) for c in lazydata.src[:]] + for c in childs[:-1]: lines += [f" ┣{c[0]}"] + [f" ┃{l}" for l in c[1:]] + return lines + [" ┗"+childs[-1][0]] + [" "+l for l in childs[-1][1:]] + +def print_tree(lazydata:LazyOp): print("\n".join([f"{str(i).rjust(3)} {s}" for i,s in enumerate(_tree(lazydata))])) \ No newline at end of file diff --git a/tinygrad/realize.py b/tinygrad/realize.py index 427cc1ed1a..5e21d1ec07 100644 --- a/tinygrad/realize.py +++ b/tinygrad/realize.py @@ -1,15 +1,13 @@ from typing import List, Tuple, cast, Dict, Callable import numpy as np from tinygrad.ops import LazyOp, LoadOps, Device, UnaryOps, BufferOps, MemBuffer, get_lazyop_info -from tinygrad.graph import log_schedule_item +from tinygrad.graph import log_schedule_item, print_tree from tinygrad.lazy import LazyBuffer from tinygrad.helpers import DEBUG, prod, all_int, getenv, IMAGE, ImageDType, dtypes from tinygrad.runtime.lib import RawBufferMapped, RawBufferTransfer from tinygrad.runtime.ops_disk import RawDiskBuffer -P2P = getenv("P2P", 0) - def fix_schedule_for_images(schedule:List[Tuple[LazyOp, LazyBuffer, Tuple[LazyBuffer, ...]]]): # this is the fundamental fix, find unwritable or unreadable images and convert them to normal float32 (TODO: should it be float16?) for op,out,buffers in schedule: @@ -41,6 +39,7 @@ def fix_schedule_for_images(schedule:List[Tuple[LazyOp, LazyBuffer, Tuple[LazyBu fixed_schedule.append((op, out, buffers)) return fixed_schedule +# *** this is where things happen *** def run_schedule(schedule:List[Tuple[LazyOp, LazyBuffer, Tuple[LazyBuffer, ...]]]): # HACK: images can be not usable due to shape @@ -51,9 +50,7 @@ def run_schedule(schedule:List[Tuple[LazyOp, LazyBuffer, Tuple[LazyBuffer, ...]] op,out,buffers = schedule.pop(0) log_schedule_item(op, out, buffers) assert all(x.realized for x in buffers), "can't run schedule, some buffers aren't realized" - if DEBUG >= 3: - from extra.utils import print_tree # type: ignore - print_tree(op) + if DEBUG >= 3: print_tree(op) if op.op in LoadOps: # confirm the LoadOps are contiguous and in order for i,s in enumerate(op.src): assert isinstance(s, LazyOp) and s.op == BufferOps.MEM and s.arg.idx == i+1 and s.arg.st.contiguous, f"bad LoadOps src {i}: {s}" @@ -89,7 +86,7 @@ def _realize_from(buffer: LazyBuffer, src: LazyBuffer) -> None: assert all_int(buffer.shape), "does not support symbolic shape" buffer.realized = Device[buffer.device].buffer(prod(buffer.shape), buffer.dtype, **buffer._device_extra_args()) src.realized.readinto(cast(RawBufferMapped, buffer.realized)._buffer()) - elif isinstance(src.realized, RawBufferTransfer) and issubclass(Device[buffer.device].buffer, RawBufferTransfer) and P2P >= 1: + elif isinstance(src.realized, RawBufferTransfer) and issubclass(Device[buffer.device].buffer, RawBufferTransfer) and getenv("P2P", 0) >= 1: buffer.realized = cast(RawBufferTransfer, Device[buffer.device].buffer).transfer(src.realized, buffer.shape, buffer.dtype, **buffer._device_extra_args()) else: # TODO: schedule this as FROM to go to CPU, and a FROM to go to device