mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-10 23:48:01 -05:00
remove pushing contig + fix linearizer bug (#2798)
* remove that logic * fix test, move LOADs * fix repeat issue on LLVM * with_phi
This commit is contained in:
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -184,7 +184,7 @@ jobs:
|
||||
- if: ${{ matrix.task == 'openpilot' }}
|
||||
name: Test openpilot model compile and size
|
||||
run: |
|
||||
DEBUG=2 ALLOWED_KERNEL_COUNT=207 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py
|
||||
DEBUG=2 ALLOWED_KERNEL_COUNT=208 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py
|
||||
#python -c 'import os; assert os.path.getsize("/tmp/output.thneed") < 100_000_000'
|
||||
- if: ${{ matrix.task == 'openpilot' }}
|
||||
name: Test openpilot model correctness (float32)
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1252,6 +1252,10 @@ class TestOps(unittest.TestCase):
|
||||
|
||||
np.testing.assert_allclose(x.repeat((2, 0, 4)).numpy(), Tensor.zeros(8, 0, 12).numpy())
|
||||
|
||||
def test_simple_repeat(self):
|
||||
repeats = [3, 3, 4]
|
||||
helper_test_op([(3, 3)], lambda x: x.repeat(*repeats), lambda x: x.repeat(repeats))
|
||||
|
||||
def test_clip(self):
|
||||
helper_test_op([(45,65)], lambda x: x.clip(-2.3, 1.2), lambda x: x.clip(-2.3, 1.2))
|
||||
|
||||
|
||||
@@ -376,9 +376,16 @@ class Linearizer(Kernel):
|
||||
# store
|
||||
self.global_store(0, global_idxs+local_idxs+fake_reduce_idxs+upcast_idxs, val)
|
||||
|
||||
# get PHI node loop scope, link anything using a DEFINE_ACC to the loop as a "parent"
|
||||
acc_scope: DefaultDict[UOp, List[UOp]] = defaultdict(list)
|
||||
for u in self.uops:
|
||||
if u.uop == UOps.PHI:
|
||||
acc_scope[u.vin[0]] += u.vin[2:]
|
||||
|
||||
# graph helper functions
|
||||
@functools.lru_cache(None)
|
||||
def get_recursive_parents(x:UOp) -> Set[UOp]: return set.union(set(x.vin), *[get_recursive_parents(p) for p in x.vin])
|
||||
def get_recursive_parents(x:UOp, with_phi=False) -> Set[UOp]:
|
||||
return set.union(set(x.vin), *[get_recursive_parents(p, with_phi) for p in x.vin], set(acc_scope[x]) if with_phi else set())
|
||||
|
||||
def get_recursive_children(x:UOp) -> Set[UOp]:
|
||||
deps = set([x])
|
||||
@@ -400,9 +407,9 @@ class Linearizer(Kernel):
|
||||
for u in self.uops:
|
||||
if not loop_stack[-1]: loop_stack[-1].append(u)
|
||||
elif u.uop == UOps.LOOP: loop_stack.append([u])
|
||||
elif u.uop not in [UOps.CONST, UOps.ALU, UOps.CAST]: loop_stack[-1].append(u)
|
||||
elif u.uop not in [UOps.CONST, UOps.ALU, UOps.CAST, UOps.LOAD]: loop_stack[-1].append(u)
|
||||
else:
|
||||
parents = get_recursive_parents(u)
|
||||
parents = get_recursive_parents(u, with_phi=True)
|
||||
for i in reversed(range(len(loop_stack))):
|
||||
# check backwards and put the uop in the first encounter with some dependency
|
||||
if any(x in parents for x in loop_stack[i]) or i == 0:
|
||||
|
||||
@@ -21,8 +21,7 @@ LAZYCACHE = getenv("LAZYCACHE", 1)
|
||||
# TODO: movement ops that only change shape are really nops. treat them as such
|
||||
REMOVE_MOVEMENT_NOPS, MERGE_ELEMENTWISE_INTO_REDUCE, SHUFFLE_MOVEMENT_OPS, MERGE_ELEMENTWISE_OPS = OPT>=1, OPT>=1, OPT>=1, OPT>=1
|
||||
MERGE_ONE_REDUCE_INTO_ELEMENTWISE, SHUFFLE_PAD_OPS = OPT>=2, OPT>=2
|
||||
PUSH_PERMUTES, PUSH_CONTIGUOUS = OPT>=3, OPT>=3
|
||||
PUSH_RESHAPES = OPT>=4
|
||||
PUSH_PERMUTES = OPT>=3
|
||||
|
||||
# **** ast fixing functions ****
|
||||
|
||||
@@ -239,16 +238,6 @@ class LazyBuffer:
|
||||
# get outputs now
|
||||
out_device, out_shape, out_dtype = srcs[0].device, srcs[0].shape, max([x.dtype for x in srcs]) if op != UnaryOps.CAST else cast(Tuple[DType, bool], arg)[0]
|
||||
|
||||
# push all contiguous to the end of BinaryOps
|
||||
if PUSH_CONTIGUOUS and any(not x.realized and x.op.op == LoadOps.CONTIGUOUS and len(x.op.src[0].children) <= 1 for x in srcs):
|
||||
new_srcs: List[LazyBuffer] = []
|
||||
for x in srcs:
|
||||
if not x.realized and x.op.op == LoadOps.CONTIGUOUS and len(x.op.src[0].children) <= 1:
|
||||
x.op.src[0].children.discard(x)
|
||||
x = cast(LazyBuffer, x.op.src[0])
|
||||
new_srcs.append(x)
|
||||
return new_srcs[0].e(op, *new_srcs[1:], arg=arg).contiguous()
|
||||
|
||||
if MERGE_ELEMENTWISE_OPS:
|
||||
# remove the buffers from any (childless) BinaryOps that feed into this
|
||||
_srcs = tuple([x.op if x.optype == BinaryOps and not x.children and not x.realized else x for x in srcs])
|
||||
@@ -329,7 +318,7 @@ class LazyBuffer:
|
||||
|
||||
def _movement_op(self, st: ShapeTracker, op: MovementOps, arg: Union[Tuple[sint, ...], Tuple[Tuple[sint, sint], ...]]) -> LazyBuffer:
|
||||
if SHUFFLE_MOVEMENT_OPS and not self.realized and self.optype == BinaryOps and not self.children:
|
||||
if op in {MovementOps.SHRINK, MovementOps.STRIDE, MovementOps.PERMUTE} or (op == MovementOps.RESHAPE and (self.op.op in UnaryOps or PUSH_RESHAPES)):
|
||||
if op in {MovementOps.SHRINK, MovementOps.STRIDE, MovementOps.PERMUTE} or (op == MovementOps.RESHAPE and self.op.op in UnaryOps):
|
||||
return self.op.replace_with_movement_ops([(op, arg)])
|
||||
if REMOVE_MOVEMENT_NOPS and not self.realized and st.contiguous:
|
||||
# MovementOps aren't stacked any more, they each have one parent, find the root
|
||||
|
||||
Reference in New Issue
Block a user