s/get_linearizer/get_kernel [run_process_replay] (#5467)

This commit is contained in:
chenyu
2024-07-13 20:32:22 -04:00
committed by GitHub
parent 0345577032
commit 28972418c4
11 changed files with 25 additions and 25 deletions

View File

@@ -56,8 +56,8 @@ st_0 = LazyOp(BufferOps.STORE, (alu,), MemBuffer(0, dtypes.int32, ShapeTracker.f
sink = LazyOp(MetaOps.SINK, (st_0,)) sink = LazyOp(MetaOps.SINK, (st_0,))
# convert the computation to a "linearized" format (print the format) # convert the computation to a "linearized" format (print the format)
from tinygrad.engine.realize import get_linearizer, CompiledRunner from tinygrad.engine.realize import get_kernel, CompiledRunner
lin = get_linearizer(Device[DEVICE].renderer, sink).linearize() lin = get_kernel(Device[DEVICE].renderer, sink).linearize()
for u in lin.uops: print(u) for u in lin.uops: print(u)
# compile a program (and print the source) # compile a program (and print the source)

View File

@@ -6,7 +6,7 @@ Device.DEFAULT = "CLANG"
from train_gpt2 import GPT, GPTConfig from train_gpt2 import GPT, GPTConfig
from tinygrad.helpers import dedup, to_function_name, flatten, getenv, GRAPH, GlobalCounters, ansilen, to_function_name from tinygrad.helpers import dedup, to_function_name, flatten, getenv, GRAPH, GlobalCounters, ansilen, to_function_name
from tinygrad.engine.schedule import create_schedule, memory_planner from tinygrad.engine.schedule import create_schedule, memory_planner
from tinygrad.engine.realize import get_linearizer, run_schedule from tinygrad.engine.realize import get_kernel, run_schedule
from tinygrad.ops import BufferOps, MetaOps from tinygrad.ops import BufferOps, MetaOps
TIMING = getenv("TIMING") TIMING = getenv("TIMING")
@@ -46,7 +46,7 @@ if __name__ == "__main__":
ast_dedup = dedup([si.ast for si in sched if si.ast[0].op is BufferOps.STORE]) ast_dedup = dedup([si.ast for si in sched if si.ast[0].op is BufferOps.STORE])
srcs = {} srcs = {}
for ast in ast_dedup: for ast in ast_dedup:
k = get_linearizer(Device["CLANG"].renderer, ast) k = get_kernel(Device["CLANG"].renderer, ast)
k.linearize() k.linearize()
src = Device["CLANG"].renderer.render(to_function_name(k.name), k.uops) src = Device["CLANG"].renderer.render(to_function_name(k.name), k.uops)
srcs[ast] = (k.name, src) srcs[ast] = (k.name, src)

View File

@@ -3,7 +3,7 @@ import numpy as np
import math, random import math, random
from tinygrad.tensor import Tensor from tinygrad.tensor import Tensor
from tinygrad.nn.state import get_parameters, get_state_dict, safe_save, safe_load, load_state_dict from tinygrad.nn.state import get_parameters, get_state_dict, safe_save, safe_load, load_state_dict
from tinygrad.engine.search import actions, bufs_from_lin, time_linearizer, get_linearizer_actions from tinygrad.engine.search import actions, bufs_from_lin, time_linearizer, get_kernel_actions
from tinygrad.nn.optim import Adam from tinygrad.nn.optim import Adam
from extra.optimization.extract_policynet import PolicyNet from extra.optimization.extract_policynet import PolicyNet
from extra.optimization.helpers import load_worlds, ast_str_to_lin, lin_to_feats from extra.optimization.helpers import load_worlds, ast_str_to_lin, lin_to_feats
@@ -32,7 +32,7 @@ if __name__ == "__main__":
# mask valid actions # mask valid actions
valid_action_mask = np.zeros((len(actions)+1), dtype=np.float32) valid_action_mask = np.zeros((len(actions)+1), dtype=np.float32)
for x in get_linearizer_actions(lin): valid_action_mask[x] = 1 for x in get_kernel_actions(lin): valid_action_mask[x] = 1
probs *= valid_action_mask probs *= valid_action_mask
probs /= sum(probs) probs /= sum(probs)

View File

@@ -1,6 +1,6 @@
from typing import List, Tuple from typing import List, Tuple
from tinygrad.codegen.kernel import Kernel from tinygrad.codegen.kernel import Kernel
from tinygrad.engine.search import get_linearizer_actions, actions from tinygrad.engine.search import get_kernel_actions, actions
_net = None _net = None
def beam_q_estimate(beam:List[Tuple[Kernel, float]]) -> List[Tuple[Kernel, float]]: def beam_q_estimate(beam:List[Tuple[Kernel, float]]) -> List[Tuple[Kernel, float]]:
@@ -19,7 +19,7 @@ def beam_q_estimate(beam:List[Tuple[Kernel, float]]) -> List[Tuple[Kernel, float
base_tms = [] base_tms = []
for lin,tm in beam: for lin,tm in beam:
lin_feats = lin_to_feats(lin) lin_feats = lin_to_feats(lin)
for a,v in get_linearizer_actions(lin, include_0=False).items(): for a,v in get_kernel_actions(lin, include_0=False).items():
acts = np.zeros(len(actions)) acts = np.zeros(len(actions))
acts[a-1] = 1.0 acts[a-1] = 1.0
feats.append(np.concatenate([lin_feats, acts])) feats.append(np.concatenate([lin_feats, acts]))

View File

@@ -6,7 +6,7 @@ from copy import deepcopy
from tinygrad.helpers import getenv, colored from tinygrad.helpers import getenv, colored
from tinygrad.tensor import Tensor from tinygrad.tensor import Tensor
from tinygrad.nn.state import get_parameters, get_state_dict, safe_save, safe_load, load_state_dict from tinygrad.nn.state import get_parameters, get_state_dict, safe_save, safe_load, load_state_dict
from tinygrad.engine.search import bufs_from_lin, time_linearizer, actions, get_linearizer_actions from tinygrad.engine.search import bufs_from_lin, time_linearizer, actions, get_kernel_actions
from extra.optimization.helpers import load_worlds, ast_str_to_lin, lin_to_feats from extra.optimization.helpers import load_worlds, ast_str_to_lin, lin_to_feats
from extra.optimization.extract_policynet import PolicyNet from extra.optimization.extract_policynet import PolicyNet
from extra.optimization.pretrain_valuenet import ValueNet from extra.optimization.pretrain_valuenet import ValueNet
@@ -43,7 +43,7 @@ if __name__ == "__main__":
while 1: while 1:
if VALUE: if VALUE:
acts,feats = [], [] acts,feats = [], []
for k,v in get_linearizer_actions(lin).items(): for k,v in get_kernel_actions(lin).items():
acts.append(k) acts.append(k)
feats.append(lin_to_feats(v)) feats.append(lin_to_feats(v))
preds = net(Tensor(feats)) preds = net(Tensor(feats))

View File

@@ -1,5 +1,5 @@
from extra.optimization.helpers import load_worlds, ast_str_to_lin from extra.optimization.helpers import load_worlds, ast_str_to_lin
from tinygrad.engine.search import bufs_from_lin, time_linearizer, get_linearizer_actions from tinygrad.engine.search import bufs_from_lin, time_linearizer, get_kernel_actions
if __name__ == "__main__": if __name__ == "__main__":
ast_strs = load_worlds() ast_strs = load_worlds()
@@ -9,7 +9,7 @@ if __name__ == "__main__":
test_tm = time_linearizer(lin, rawbufs) test_tm = time_linearizer(lin, rawbufs)
if test_tm < 1e-2: continue if test_tm < 1e-2: continue
print(f"EXAMPLE {i}") print(f"EXAMPLE {i}")
acted_lins = get_linearizer_actions(lin) acted_lins = get_kernel_actions(lin)
ok_avg, short_avg = 0, 0 ok_avg, short_avg = 0, 0
for k,v in acted_lins.items(): for k,v in acted_lins.items():
tm1 = time_linearizer(v, rawbufs) tm1 = time_linearizer(v, rawbufs)

View File

@@ -9,7 +9,7 @@ from tinygrad.tensor import _to_np_dtype
from tinygrad.codegen.kernel import Kernel from tinygrad.codegen.kernel import Kernel
from tinygrad.codegen.uops import UOp from tinygrad.codegen.uops import UOp
from tinygrad.codegen.kernel import Opt, OptOps from tinygrad.codegen.kernel import Opt, OptOps
from tinygrad.engine.search import get_linearizer_actions, bufs_from_lin from tinygrad.engine.search import get_kernel_actions, bufs_from_lin
from tinygrad.engine.graph import print_tree from tinygrad.engine.graph import print_tree
from tinygrad.engine.realize import CompiledRunner from tinygrad.engine.realize import CompiledRunner
from tinygrad.helpers import getenv, from_mv, prod, colored, Context, DEBUG from tinygrad.helpers import getenv, from_mv, prod, colored, Context, DEBUG
@@ -142,7 +142,7 @@ def fuzz_linearizer(lin: Kernel, rtol=1e-2, atol=1e-2):
for depth in range(getenv("DEPTH", 1 if FUZZ_ALL_ACTIONS else 10)): for depth in range(getenv("DEPTH", 1 if FUZZ_ALL_ACTIONS else 10)):
next_lins = [] next_lins = []
for lin in last_lins: for lin in last_lins:
actions = get_linearizer_actions(lin, include_0=False) actions = get_kernel_actions(lin, include_0=False)
if not actions: continue if not actions: continue
if depth == 0 and getenv("FUZZ_REQUIRE_TC", 0): if depth == 0 and getenv("FUZZ_REQUIRE_TC", 0):
tc_acts = {i: k for k in actions.values() if k.applied_opts[0].op == OptOps.TC} tc_acts = {i: k for k in actions.values() if k.applied_opts[0].op == OptOps.TC}

View File

@@ -564,10 +564,10 @@ class TestLinearizer(unittest.TestCase):
if golden_result is None: golden_result = np.frombuffer(real_bufs[0].as_buffer(), _to_np_dtype(real_bufs[0].dtype)) if golden_result is None: golden_result = np.frombuffer(real_bufs[0].as_buffer(), _to_np_dtype(real_bufs[0].dtype))
np.testing.assert_allclose(result, golden_result, atol=0.1, rtol=0.15) np.testing.assert_allclose(result, golden_result, atol=0.1, rtol=0.15)
# check that get_linearizer_actions produces all 9 options # check that get_kernel_actions produces all 9 options
from tinygrad.engine.search import get_linearizer_actions from tinygrad.engine.search import get_kernel_actions
tc_actions = [k for i, k in get_linearizer_actions(Kernel(realized_ast), False).items() if k.applied_opts[0].op == OptOps.TC] tc_actions = [k for i, k in get_kernel_actions(Kernel(realized_ast), False).items() if k.applied_opts[0].op == OptOps.TC]
assert len(tc_actions) == 9, f"get_linearizer_actions should contain 9 possible TC actions, only got {len(tc_actions)}" assert len(tc_actions) == 9, f"get_kernel_actions should contain 9 possible TC actions, only got {len(tc_actions)}"
@unittest.skipUnless(Device[Device.DEFAULT].renderer.tensor_cores, "test requires tensor cores") @unittest.skipUnless(Device[Device.DEFAULT].renderer.tensor_cores, "test requires tensor cores")
def test_tensor_cores_unroll_phi(self): def test_tensor_cores_unroll_phi(self):

View File

@@ -65,13 +65,13 @@ class TestBEAM(unittest.TestCase):
capturing.clear() capturing.clear()
self.assertNotEqual(k_beam_0[-1].prg.p.src, k_beam_1[-1].prg.p.src) self.assertNotEqual(k_beam_0[-1].prg.p.src, k_beam_1[-1].prg.p.src)
def test_get_linearizer_actions(self): def test_get_kernel_actions(self):
from test.test_linearizer import helper_realized_ast from test.test_linearizer import helper_realized_ast
a = Tensor.rand(4, 3) a = Tensor.rand(4, 3)
b = Tensor.rand(3) b = Tensor.rand(3)
realized_ast, _ = helper_realized_ast(a @ b) realized_ast, _ = helper_realized_ast(a @ b)
from tinygrad.engine.search import get_linearizer_actions from tinygrad.engine.search import get_kernel_actions
lins = get_linearizer_actions(Kernel(realized_ast), False).values() lins = get_kernel_actions(Kernel(realized_ast), False).values()
# ensure amt=0 are not duplicated # ensure amt=0 are not duplicated
if Opt(OptOps.UPCAST, 0, 0) in actions: if Opt(OptOps.UPCAST, 0, 0) in actions:

View File

@@ -13,7 +13,7 @@ from tinygrad.engine.schedule import ScheduleItem
# **************** Program Creation **************** # **************** Program Creation ****************
logkerns, logkerns_level = open(getenv("LOGKERNS", ""), "a") if getenv("LOGKERNS", "") else None, getenv("LOGKERNS_LEVEL", 1) logkerns, logkerns_level = open(getenv("LOGKERNS", ""), "a") if getenv("LOGKERNS", "") else None, getenv("LOGKERNS_LEVEL", 1)
def get_linearizer(renderer:Renderer, ast:LazyOp) -> Kernel: def get_kernel(renderer:Renderer, ast:LazyOp) -> Kernel:
if DEBUG >= 5: if DEBUG >= 5:
from tinygrad.engine.graph import print_tree from tinygrad.engine.graph import print_tree
print_tree(ast) print_tree(ast)
@@ -157,7 +157,7 @@ def get_runner(dname:str, ast:LazyOp) -> CompiledRunner:
if bret:=method_cache.get(bkey): if bret:=method_cache.get(bkey):
method_cache[ckey] = ret = CompiledRunner(replace(bret.p, dname=dname), bret.lib) method_cache[ckey] = ret = CompiledRunner(replace(bret.p, dname=dname), bret.lib)
else: else:
prg: Program = get_linearizer(Device[dname].renderer, ast).to_program() prg: Program = get_kernel(Device[dname].renderer, ast).to_program()
if hasattr(prg.uops, "_fuzz_paths"): if hasattr(prg.uops, "_fuzz_paths"):
from test.external.fuzz_uops import UOpsFuzzerRunner from test.external.fuzz_uops import UOpsFuzzerRunner
return UOpsFuzzerRunner(replace(prg, dname=dname)) return UOpsFuzzerRunner(replace(prg, dname=dname))

View File

@@ -97,7 +97,7 @@ def bufs_from_lin(lin:Kernel, allocate:bool=True) -> List[Buffer]:
return cast(List[Buffer], rawbufs) return cast(List[Buffer], rawbufs)
# get dictionary of all possible actions # get dictionary of all possible actions
def get_linearizer_actions(lin:Kernel, include_0=True) -> Dict[int, Kernel]: def get_kernel_actions(lin:Kernel, include_0=True) -> Dict[int, Kernel]:
acted_lins, max_up, max_lcl = {0:lin} if include_0 else {}, getenv("BEAM_UPCAST_MAX", 256), getenv("BEAM_LOCAL_MAX", 1024) acted_lins, max_up, max_lcl = {0:lin} if include_0 else {}, getenv("BEAM_UPCAST_MAX", 256), getenv("BEAM_LOCAL_MAX", 1024)
for i,a in enumerate(actions): for i,a in enumerate(actions):
if a.axis is not None and a.op is not OptOps.TC: if a.axis is not None and a.op is not OptOps.TC:
@@ -140,7 +140,7 @@ def beam_search(lin:Kernel, rawbufs:List[Buffer], amt:int, allow_test_size=True)
exiting, st = False, time.perf_counter() exiting, st = False, time.perf_counter()
dev = Device[lin.opts.device] dev = Device[lin.opts.device]
while not exiting: while not exiting:
acted_lins: List[Kernel] = flatten([get_linearizer_actions(lin, include_0=False).values() for lin,_ in beam]) acted_lins: List[Kernel] = flatten([get_kernel_actions(lin, include_0=False).values() for lin,_ in beam])
timed_lins: List[Tuple[Kernel, float]] = [] timed_lins: List[Tuple[Kernel, float]] = []
_compile_fn = functools.partial(_try_compile_linearized_w_idx, compiler=dev.compiler) _compile_fn = functools.partial(_try_compile_linearized_w_idx, compiler=dev.compiler)
for i,proc in (map(_compile_fn, enumerate(acted_lins)) if beam_pool is None else beam_pool.imap_unordered(_compile_fn, enumerate(acted_lins))): for i,proc in (map(_compile_fn, enumerate(acted_lins)) if beam_pool is None else beam_pool.imap_unordered(_compile_fn, enumerate(acted_lins))):