From 3e40211e45704a7195a04420bd9ebf6ca83f3fa6 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Thu, 11 Jul 2024 10:48:45 -0700 Subject: [PATCH] add UOP_IS_SYMBOLIC [run_process_replay] [no_assert] (#5386) * cleanup a few things in uops [run_process_replay] [no_assert] * add optional UOP_IS_SYMBOLIC --- test/test_multitensor.py | 2 +- tinygrad/codegen/lowerer.py | 49 ++++++++++++++++++++++++++---------- tinygrad/codegen/uopgraph.py | 3 ++- tinygrad/engine/realize.py | 3 --- 4 files changed, 39 insertions(+), 18 deletions(-) diff --git a/test/test_multitensor.py b/test/test_multitensor.py index 70b670d2c1..186cacfdd3 100644 --- a/test/test_multitensor.py +++ b/test/test_multitensor.py @@ -329,7 +329,7 @@ class TestMultiTensor(unittest.TestCase): shard_output_np = shard_output.numpy() np.testing.assert_allclose(real_output, shard_output_np, atol=1e-6, rtol=1e-6) - @unittest.skipIf(CI and Device.DEFAULT in ("CUDA", "NV"), "slow") + @unittest.skipIf(CI and Device.DEFAULT in ("CUDA", "NV", "LLVM"), "slow, and flaky on LLVM") def test_data_parallel_resnet_train_step(self): import sys, pathlib sys.path.append((pathlib.Path(__file__).parent.parent / "extra" / "models").as_posix()) diff --git a/tinygrad/codegen/lowerer.py b/tinygrad/codegen/lowerer.py index e8678ba104..e9a13f7fc9 100644 --- a/tinygrad/codegen/lowerer.py +++ b/tinygrad/codegen/lowerer.py @@ -2,7 +2,7 @@ from __future__ import annotations from typing import List, Tuple, cast, Optional, Any, Dict import functools from tinygrad.codegen.kernel import Kernel -from tinygrad.shape.shapetracker import ShapeTracker +from tinygrad.shape.shapetracker import ShapeTracker, View from tinygrad.dtype import dtypes, PtrDType, ImageDType, DType from tinygrad.ops import BufferOps, LazyOp, TernaryOps, ReduceOps, UnaryOps, get_lazyop_info from tinygrad.codegen.uops import UOp, flops_mem, UOps @@ -10,12 +10,9 @@ from tinygrad.codegen.uopgraph import UOpGraph from tinygrad.renderer import Program from tinygrad.helpers import to_function_name, DEBUG, getenv, prod, diskcache_put, ContextVar -# TODO: this needs to be replaced, there shouldn't be variables in the shapetracker -def variable_to_uop(x, ctx=None) -> UOp: - if isinstance(x, int): return UOp.const(dtypes.int32, x) - return x.render(render_ops, ctx) - +# TODO: this needs to be replaced, there shouldn't be variables in the shapetracker, only ints and UOps from tinygrad.shape.symbolic import Variable, NumNode, SumNode, MulNode, DivNode, ModNode, LtNode, AndNode +def variable_to_uop(x, ctx=None) -> UOp: return UOp.const(dtypes.int32, x) if isinstance(x, int) else x.render(render_ops, ctx) render_ops: Any = { NumNode: lambda self, ops, ctx: UOp.const(dtypes.int, self.b), MulNode: lambda self, ops, ctx: self.a.render(ops, ctx)*variable_to_uop(self.b, ctx), DivNode: lambda self, ops, ctx: self.a.render(ops, ctx)//variable_to_uop(self.b, ctx), @@ -25,12 +22,38 @@ render_ops: Any = { NumNode: lambda self, ops, ctx: UOp.const(dtypes.int, self.b SumNode: lambda self,ops,ctx: functools.reduce(lambda a,b: a+b.render(ops, ctx), self.nodes[1:], self.nodes[0].render(ops,ctx)), AndNode: lambda self,ops,ctx: functools.reduce(lambda a,b: a*b.render(ops, ctx), self.nodes[1:], self.nodes[0].render(ops,ctx)) } -# TODO: change this once UOps is ready to replace symbolic -def st_to_uops(st:ShapeTracker, idxs:List[UOp]) -> Tuple[UOp, UOp]: - fake_idxs = [Variable(f"__idx{i}", 0, s-1) for i,s in enumerate(st.shape)] - idx, valid = st.expr_idxs(fake_idxs) - ctx = dict(zip(fake_idxs, idxs)) - return idx.render(render_ops, ctx), valid.render(render_ops, ctx).cast(dtypes.bool) +if getenv("UOP_IS_SYMBOLIC"): + # TODO: change this once UOps is ready to replace symbolic. note: this doesn't work for variable shapetrackers now + def _uop_view(view:View, idxs:List[UOp], vexpr:UOp) -> Tuple[UOp, UOp]: + # TODO: dtypes.realint + iexpr = variable_to_uop(view.offset) + for idx,sh,st,m in zip(idxs, view.shape, view.strides, view.mask if view.mask is not None else [None]*len(view.shape)): + if sh != 1 and st != 0: iexpr = iexpr + idx*variable_to_uop(st) + if m is not None: + if m[0] != 0: vexpr = vexpr * idx.ge(variable_to_uop(m[0])) + if m[1] != sh: vexpr = vexpr * idx.lt(variable_to_uop(m[1])) + return iexpr, vexpr + + def st_to_uops(st:ShapeTracker, idxs:List[UOp]) -> Tuple[UOp, UOp]: + idx, valid = _uop_view(st.views[-1], idxs, UOp.const(dtypes.bool, True)) + for view in reversed(st.views[0:-1]): + view = view.minify() + acc, idxs = 1, [] + for _d in reversed(view.shape): + d = variable_to_uop(_d) + idxs.append((idx//acc)%d) + acc *= d + idx, valid = _uop_view(view, idxs[::-1], valid) + return idx, valid +else: + def st_to_uops(st:ShapeTracker, idxs:List[UOp]) -> Tuple[UOp, UOp]: + fake_idxs = [Variable(f"__idx{i}", 0, s-1) for i,s in enumerate(st.shape)] + idx, valid = st.expr_idxs(fake_idxs) + ctx = dict(zip(fake_idxs, idxs)) + uidx, uvalid = idx.render(render_ops, ctx), valid.render(render_ops, ctx) + if uvalid.op is UOps.CONST: uvalid = UOp.const(dtypes.bool, uvalid.arg) + assert uvalid.dtype == dtypes.bool + return uidx, uvalid def get_grouped_dims(prefix, start_dim, local_dims, maxdim:int=0) -> Tuple[List[UOp], List[UOp]]: local_idxs = loop_local_idxs = [UOp(UOps.SPECIAL, dtypes.int32, (), (i, f"{prefix}{start_dim+i}", s)) for i,s in enumerate((prod(local_dims[:-(maxdim-1)]),) + local_dims[-(maxdim-1):] if len(local_dims) > maxdim else local_dims)] # noqa: E501 @@ -88,7 +111,7 @@ class Lowerer(Kernel): def linearize(self) -> Lowerer: modified_ast, ki = self.get_optimized_ast() - if DEBUG >= 4: + if DEBUG >= 3: from tinygrad.engine.graph import print_tree for mast in modified_ast: print_tree(mast) diff --git a/tinygrad/codegen/uopgraph.py b/tinygrad/codegen/uopgraph.py index 28dd0f1f25..df6bf65b34 100644 --- a/tinygrad/codegen/uopgraph.py +++ b/tinygrad/codegen/uopgraph.py @@ -383,7 +383,6 @@ constant_folder = PatternMatcher([ # ** self folding ** (-(-UOp.var('x')), lambda x: x), # -(-x) -> x (UOp.var('x') + 0, lambda x: x), # x+0 -> x - (UOp.var('x') - 0, lambda x: x), # x-0 -> x (UOp.var('x') * 1, lambda x: x), # x*1 -> x (UOp.var('x') * -1, lambda x: -x), # x*-1 -> -x (UOp.var('x') // UOp.var('x'), lambda x: UOp.const(x.dtype, 1)), # x//x -> 1 @@ -406,6 +405,8 @@ constant_folder = PatternMatcher([ # *** rules from symbolic *** # two stage mul, (x*c1)*c2 = x*(c1*c2) ((UOp.var("x") * UOp.cvar("c1")) * UOp.cvar("c2"), lambda x,c1,c2: x*UOp.const(x.dtype, exec_alu(BinaryOps.MUL, x.dtype, [c1.arg, c2.arg]))), + # -(x+y) -> -x + -y + #(-(UOp.var("x") + UOp.var("y")), lambda x,y: (-x)+(-y)), # x%1 -> 0 (UOp.var("x") % UOp.const(None, 1), lambda x: UOp.const(x.dtype, 0)), # (x*c0)+(x*c1) -> x*(c0+c1) diff --git a/tinygrad/engine/realize.py b/tinygrad/engine/realize.py index 829d368092..f98ae9b737 100644 --- a/tinygrad/engine/realize.py +++ b/tinygrad/engine/realize.py @@ -13,9 +13,6 @@ from tinygrad.engine.schedule import ScheduleItem logkerns, logkerns_level = open(getenv("LOGKERNS", ""), "a") if getenv("LOGKERNS", "") else None, getenv("LOGKERNS_LEVEL", 1) def get_linearizer(renderer:Renderer, ast:Tuple[LazyOp, ...]) -> Linearizer: - if DEBUG >= 3: - from tinygrad.engine.graph import print_tree - for op in ast: print_tree(op) k = Linearizer(*ast, opts=renderer) k.required_optimizations() if not NOOPT: