s/get_linearizer/get_kernel [run_process_replay] (#5467)

This commit is contained in:
chenyu
2024-07-13 20:32:22 -04:00
committed by GitHub
parent 0345577032
commit 28972418c4
11 changed files with 25 additions and 25 deletions

View File

@@ -56,8 +56,8 @@ st_0 = LazyOp(BufferOps.STORE, (alu,), MemBuffer(0, dtypes.int32, ShapeTracker.f
sink = LazyOp(MetaOps.SINK, (st_0,))
# convert the computation to a "linearized" format (print the format)
from tinygrad.engine.realize import get_linearizer, CompiledRunner
lin = get_linearizer(Device[DEVICE].renderer, sink).linearize()
from tinygrad.engine.realize import get_kernel, CompiledRunner
lin = get_kernel(Device[DEVICE].renderer, sink).linearize()
for u in lin.uops: print(u)
# compile a program (and print the source)

View File

@@ -6,7 +6,7 @@ Device.DEFAULT = "CLANG"
from train_gpt2 import GPT, GPTConfig
from tinygrad.helpers import dedup, to_function_name, flatten, getenv, GRAPH, GlobalCounters, ansilen, to_function_name
from tinygrad.engine.schedule import create_schedule, memory_planner
from tinygrad.engine.realize import get_linearizer, run_schedule
from tinygrad.engine.realize import get_kernel, run_schedule
from tinygrad.ops import BufferOps, MetaOps
TIMING = getenv("TIMING")
@@ -46,7 +46,7 @@ if __name__ == "__main__":
ast_dedup = dedup([si.ast for si in sched if si.ast[0].op is BufferOps.STORE])
srcs = {}
for ast in ast_dedup:
k = get_linearizer(Device["CLANG"].renderer, ast)
k = get_kernel(Device["CLANG"].renderer, ast)
k.linearize()
src = Device["CLANG"].renderer.render(to_function_name(k.name), k.uops)
srcs[ast] = (k.name, src)

View File

@@ -3,7 +3,7 @@ import numpy as np
import math, random
from tinygrad.tensor import Tensor
from tinygrad.nn.state import get_parameters, get_state_dict, safe_save, safe_load, load_state_dict
from tinygrad.engine.search import actions, bufs_from_lin, time_linearizer, get_linearizer_actions
from tinygrad.engine.search import actions, bufs_from_lin, time_linearizer, get_kernel_actions
from tinygrad.nn.optim import Adam
from extra.optimization.extract_policynet import PolicyNet
from extra.optimization.helpers import load_worlds, ast_str_to_lin, lin_to_feats
@@ -32,7 +32,7 @@ if __name__ == "__main__":
# mask valid actions
valid_action_mask = np.zeros((len(actions)+1), dtype=np.float32)
for x in get_linearizer_actions(lin): valid_action_mask[x] = 1
for x in get_kernel_actions(lin): valid_action_mask[x] = 1
probs *= valid_action_mask
probs /= sum(probs)

View File

@@ -1,6 +1,6 @@
from typing import List, Tuple
from tinygrad.codegen.kernel import Kernel
from tinygrad.engine.search import get_linearizer_actions, actions
from tinygrad.engine.search import get_kernel_actions, actions
_net = None
def beam_q_estimate(beam:List[Tuple[Kernel, float]]) -> List[Tuple[Kernel, float]]:
@@ -19,7 +19,7 @@ def beam_q_estimate(beam:List[Tuple[Kernel, float]]) -> List[Tuple[Kernel, float
base_tms = []
for lin,tm in beam:
lin_feats = lin_to_feats(lin)
for a,v in get_linearizer_actions(lin, include_0=False).items():
for a,v in get_kernel_actions(lin, include_0=False).items():
acts = np.zeros(len(actions))
acts[a-1] = 1.0
feats.append(np.concatenate([lin_feats, acts]))

View File

@@ -6,7 +6,7 @@ from copy import deepcopy
from tinygrad.helpers import getenv, colored
from tinygrad.tensor import Tensor
from tinygrad.nn.state import get_parameters, get_state_dict, safe_save, safe_load, load_state_dict
from tinygrad.engine.search import bufs_from_lin, time_linearizer, actions, get_linearizer_actions
from tinygrad.engine.search import bufs_from_lin, time_linearizer, actions, get_kernel_actions
from extra.optimization.helpers import load_worlds, ast_str_to_lin, lin_to_feats
from extra.optimization.extract_policynet import PolicyNet
from extra.optimization.pretrain_valuenet import ValueNet
@@ -43,7 +43,7 @@ if __name__ == "__main__":
while 1:
if VALUE:
acts,feats = [], []
for k,v in get_linearizer_actions(lin).items():
for k,v in get_kernel_actions(lin).items():
acts.append(k)
feats.append(lin_to_feats(v))
preds = net(Tensor(feats))

View File

@@ -1,5 +1,5 @@
from extra.optimization.helpers import load_worlds, ast_str_to_lin
from tinygrad.engine.search import bufs_from_lin, time_linearizer, get_linearizer_actions
from tinygrad.engine.search import bufs_from_lin, time_linearizer, get_kernel_actions
if __name__ == "__main__":
ast_strs = load_worlds()
@@ -9,7 +9,7 @@ if __name__ == "__main__":
test_tm = time_linearizer(lin, rawbufs)
if test_tm < 1e-2: continue
print(f"EXAMPLE {i}")
acted_lins = get_linearizer_actions(lin)
acted_lins = get_kernel_actions(lin)
ok_avg, short_avg = 0, 0
for k,v in acted_lins.items():
tm1 = time_linearizer(v, rawbufs)

View File

@@ -9,7 +9,7 @@ from tinygrad.tensor import _to_np_dtype
from tinygrad.codegen.kernel import Kernel
from tinygrad.codegen.uops import UOp
from tinygrad.codegen.kernel import Opt, OptOps
from tinygrad.engine.search import get_linearizer_actions, bufs_from_lin
from tinygrad.engine.search import get_kernel_actions, bufs_from_lin
from tinygrad.engine.graph import print_tree
from tinygrad.engine.realize import CompiledRunner
from tinygrad.helpers import getenv, from_mv, prod, colored, Context, DEBUG
@@ -142,7 +142,7 @@ def fuzz_linearizer(lin: Kernel, rtol=1e-2, atol=1e-2):
for depth in range(getenv("DEPTH", 1 if FUZZ_ALL_ACTIONS else 10)):
next_lins = []
for lin in last_lins:
actions = get_linearizer_actions(lin, include_0=False)
actions = get_kernel_actions(lin, include_0=False)
if not actions: continue
if depth == 0 and getenv("FUZZ_REQUIRE_TC", 0):
tc_acts = {i: k for k in actions.values() if k.applied_opts[0].op == OptOps.TC}

View File

@@ -564,10 +564,10 @@ class TestLinearizer(unittest.TestCase):
if golden_result is None: golden_result = np.frombuffer(real_bufs[0].as_buffer(), _to_np_dtype(real_bufs[0].dtype))
np.testing.assert_allclose(result, golden_result, atol=0.1, rtol=0.15)
# check that get_linearizer_actions produces all 9 options
from tinygrad.engine.search import get_linearizer_actions
tc_actions = [k for i, k in get_linearizer_actions(Kernel(realized_ast), False).items() if k.applied_opts[0].op == OptOps.TC]
assert len(tc_actions) == 9, f"get_linearizer_actions should contain 9 possible TC actions, only got {len(tc_actions)}"
# check that get_kernel_actions produces all 9 options
from tinygrad.engine.search import get_kernel_actions
tc_actions = [k for i, k in get_kernel_actions(Kernel(realized_ast), False).items() if k.applied_opts[0].op == OptOps.TC]
assert len(tc_actions) == 9, f"get_kernel_actions should contain 9 possible TC actions, only got {len(tc_actions)}"
@unittest.skipUnless(Device[Device.DEFAULT].renderer.tensor_cores, "test requires tensor cores")
def test_tensor_cores_unroll_phi(self):

View File

@@ -65,13 +65,13 @@ class TestBEAM(unittest.TestCase):
capturing.clear()
self.assertNotEqual(k_beam_0[-1].prg.p.src, k_beam_1[-1].prg.p.src)
def test_get_linearizer_actions(self):
def test_get_kernel_actions(self):
from test.test_linearizer import helper_realized_ast
a = Tensor.rand(4, 3)
b = Tensor.rand(3)
realized_ast, _ = helper_realized_ast(a @ b)
from tinygrad.engine.search import get_linearizer_actions
lins = get_linearizer_actions(Kernel(realized_ast), False).values()
from tinygrad.engine.search import get_kernel_actions
lins = get_kernel_actions(Kernel(realized_ast), False).values()
# ensure amt=0 are not duplicated
if Opt(OptOps.UPCAST, 0, 0) in actions:

View File

@@ -13,7 +13,7 @@ from tinygrad.engine.schedule import ScheduleItem
# **************** Program Creation ****************
logkerns, logkerns_level = open(getenv("LOGKERNS", ""), "a") if getenv("LOGKERNS", "") else None, getenv("LOGKERNS_LEVEL", 1)
def get_linearizer(renderer:Renderer, ast:LazyOp) -> Kernel:
def get_kernel(renderer:Renderer, ast:LazyOp) -> Kernel:
if DEBUG >= 5:
from tinygrad.engine.graph import print_tree
print_tree(ast)
@@ -157,7 +157,7 @@ def get_runner(dname:str, ast:LazyOp) -> CompiledRunner:
if bret:=method_cache.get(bkey):
method_cache[ckey] = ret = CompiledRunner(replace(bret.p, dname=dname), bret.lib)
else:
prg: Program = get_linearizer(Device[dname].renderer, ast).to_program()
prg: Program = get_kernel(Device[dname].renderer, ast).to_program()
if hasattr(prg.uops, "_fuzz_paths"):
from test.external.fuzz_uops import UOpsFuzzerRunner
return UOpsFuzzerRunner(replace(prg, dname=dname))

View File

@@ -97,7 +97,7 @@ def bufs_from_lin(lin:Kernel, allocate:bool=True) -> List[Buffer]:
return cast(List[Buffer], rawbufs)
# get dictionary of all possible actions
def get_linearizer_actions(lin:Kernel, include_0=True) -> Dict[int, Kernel]:
def get_kernel_actions(lin:Kernel, include_0=True) -> Dict[int, Kernel]:
acted_lins, max_up, max_lcl = {0:lin} if include_0 else {}, getenv("BEAM_UPCAST_MAX", 256), getenv("BEAM_LOCAL_MAX", 1024)
for i,a in enumerate(actions):
if a.axis is not None and a.op is not OptOps.TC:
@@ -140,7 +140,7 @@ def beam_search(lin:Kernel, rawbufs:List[Buffer], amt:int, allow_test_size=True)
exiting, st = False, time.perf_counter()
dev = Device[lin.opts.device]
while not exiting:
acted_lins: List[Kernel] = flatten([get_linearizer_actions(lin, include_0=False).values() for lin,_ in beam])
acted_lins: List[Kernel] = flatten([get_kernel_actions(lin, include_0=False).values() for lin,_ in beam])
timed_lins: List[Tuple[Kernel, float]] = []
_compile_fn = functools.partial(_try_compile_linearized_w_idx, compiler=dev.compiler)
for i,proc in (map(_compile_fn, enumerate(acted_lins)) if beam_pool is None else beam_pool.imap_unordered(_compile_fn, enumerate(acted_lins))):