diff --git a/tinygrad/codegen/late/linearizer.py b/tinygrad/codegen/late/linearizer.py index 45b53204fb..24ba9ff005 100644 --- a/tinygrad/codegen/late/linearizer.py +++ b/tinygrad/codegen/late/linearizer.py @@ -1,21 +1,27 @@ import heapq from collections import defaultdict from tinygrad.uop.ops import PatternMatcher, UOp, Ops, UPat +from tinygrad.helpers import prod def linearize(u:UOp) -> list[UOp]: # this is a toposort with priority lst = list(u.toposort()) consumers: defaultdict[UOp, list[UOp]] = defaultdict(list) in_degree:dict[UOp, int] = {} - priorities:dict[UOp, int] = {} + priorities:dict[UOp, tuple[int, int]] = {} # get consumers and assign priorities # NOTE: this requires the lst be locally toposorted for u in reversed(lst): for s in u.src: consumers[s].append(u) in_degree[u] = len(u.src) + + # we place UOps with higher run_counts later + # this will cause ranges to be placed late and ends to be placed early + run_count = prod([int(r.vmax)+1 for r in u.ranges]) + # put loads in the beginning of the block and prevent priority inversion. hack for BARRIER grouping too - priority = [0] + [priorities[x] for x in consumers[u]] + priority = [0] + [priorities[x][1] for x in consumers[u]] if u.op is Ops.LOAD: priority.append(-1000) if u.op is Ops.BARRIER: priority.append(-1500) # ranges are scheduled as late as possible so anything that can be outside is @@ -23,7 +29,7 @@ def linearize(u:UOp) -> list[UOp]: if u.op is Ops.END: priority = [-1000] # move defines and consts to the top if u.op in {Ops.DEFINE_GLOBAL, Ops.DEFINE_LOCAL, Ops.DEFINE_REG, Ops.DEFINE_VAR, Ops.SPECIAL, Ops.CONST}: priority.append(-2000) - priorities[u] = min(priority) + priorities[u] = (run_count, min(priority)) # number the uops in "ideal" order nkey = {u:i for i,u in enumerate(sorted(lst, key=lambda x: (priorities[x],)+x.tuplize))}