mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
linearizer ast as a tuple of lazyops (#3689)
* multi store op linearizer * currently we do only one output per kernel * named opts
This commit is contained in:
@@ -240,7 +240,7 @@ result = Tensor(2.0).realize() + Tensor(3.0).realize()
|
||||
from tinygrad.codegen.linearizer import Linearizer
|
||||
from tinygrad.realize import create_schedule
|
||||
sched = create_schedule([result.lazydata])
|
||||
linearizer = Linearizer(sched[-1].ast, ClangCompiler.linearizer_opts)
|
||||
linearizer = Linearizer(sched[-1].ast, opts=ClangCompiler.linearizer_opts)
|
||||
linearizer.linearize()
|
||||
|
||||
# print the uops
|
||||
|
||||
@@ -43,18 +43,18 @@ if __name__ == "__main__":
|
||||
lins:List[Linearizer] = []
|
||||
|
||||
# always try hand coded opt
|
||||
lin = Linearizer(si.ast, device.compiler.linearizer_opts)
|
||||
lin = Linearizer(si.ast, opts=device.compiler.linearizer_opts)
|
||||
lin.hand_coded_optimizations()
|
||||
lins.append(lin)
|
||||
|
||||
# maybe try tensor cores
|
||||
lin = Linearizer(si.ast, device.compiler.linearizer_opts)
|
||||
lin = Linearizer(si.ast, opts=device.compiler.linearizer_opts)
|
||||
if lin.apply_tensor_cores():
|
||||
lins.append(lin)
|
||||
|
||||
# try a beam search
|
||||
if beam:=getenv("BEAM"):
|
||||
lin = Linearizer(si.ast, device.compiler.linearizer_opts)
|
||||
lin = Linearizer(si.ast, opts=device.compiler.linearizer_opts)
|
||||
lin = beam_search(lin, rawbufs, beam, bool(getenv("BEAM_ESTIMATE", 1)))
|
||||
lins.append(lin)
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ inf, nan = float('inf'), float('nan')
|
||||
# kernel unpacker
|
||||
from tinygrad.codegen.linearizer import Linearizer
|
||||
def ast_str_to_ast(ast_str:str) -> LazyOp: return eval(ast_str)
|
||||
def ast_str_to_lin(ast_str:str, opts=None): return Linearizer(ast_str_to_ast(ast_str), opts)
|
||||
def ast_str_to_lin(ast_str:str, opts=None): return Linearizer(ast_str_to_ast(ast_str), opts=opts)
|
||||
|
||||
# load worlds, a dataset of about 12k kernels
|
||||
import gzip
|
||||
|
||||
@@ -204,7 +204,7 @@ class TestLinearizer(unittest.TestCase):
|
||||
ConstBuffer(42, dtypes.float, ShapeTracker(views=(View(shape=(), strides=(), offset=0, mask=None, contiguous=True),))))
|
||||
ast = LazyOp(BufferOps.STORE, (ast,),
|
||||
MemBuffer(0, dtypes.float, ShapeTracker(views=(View(shape=(), strides=(), offset=0, mask=None, contiguous=True),))))
|
||||
lin = Linearizer(ast=ast) # this is a dummy ast
|
||||
lin = Linearizer(ast) # this is a dummy ast
|
||||
|
||||
lin.uops = UOpGraph()
|
||||
return lin.uops.add(uop, dtype, vin, arg, cachable=False)
|
||||
|
||||
@@ -86,11 +86,13 @@ class LinearizerOptions(NamedTuple):
|
||||
local_max: Optional[List[int]] = None
|
||||
|
||||
class Kernel:
|
||||
def __init__(self, ast:LazyOp, opts:Optional[LinearizerOptions]=None):
|
||||
def __init__(self, *ast:LazyOp, opts:Optional[LinearizerOptions]=None):
|
||||
self.opts = opts or (device.compiler.linearizer_opts if isinstance(device:=Device[Device.DEFAULT], Compiled) and device.compiler is not None else
|
||||
LinearizerOptions(Device.DEFAULT))
|
||||
self.ast = ast
|
||||
assert ast.op == BufferOps.STORE, f"kernels must have a store as the output, got {ast.op}"
|
||||
assert all(op.op is BufferOps.STORE for op in ast), f"kernels must have stores as the output, got {ast}"
|
||||
assert len(set(op.arg.st.size for op in ast)) == 1, f"all outbufs should have the same size, got {[op.arg.st for op in ast]}"
|
||||
assert len(ast) == 1, "max one output per kernel"
|
||||
self.ast = ast[0]
|
||||
|
||||
# fetch lazyop info
|
||||
self.info: FlopCounter = get_lazyop_info(self.ast)
|
||||
|
||||
@@ -248,16 +248,16 @@ class Compiled:
|
||||
from tinygrad.features.graph import print_tree
|
||||
print_tree(ast)
|
||||
from tinygrad.codegen.linearizer import Linearizer
|
||||
k = Linearizer(ast, self.compiler.linearizer_opts)
|
||||
k = Linearizer(ast, opts=self.compiler.linearizer_opts)
|
||||
k.required_optimizations()
|
||||
if not NOOPT:
|
||||
if not (used_tensor_cores:=k.apply_tensor_cores(getenv("TC", 1))): k.hand_coded_optimizations()
|
||||
if BEAM >= 1:
|
||||
lins = [(("tc" if used_tensor_cores else "hc"), k)]
|
||||
if used_tensor_cores:
|
||||
lins.append(("hc", Linearizer(ast, self.compiler.linearizer_opts)))
|
||||
lins.append(("hc", Linearizer(ast, opts=self.compiler.linearizer_opts)))
|
||||
lins[-1][1].hand_coded_optimizations()
|
||||
kb = Linearizer(ast, self.compiler.linearizer_opts)
|
||||
kb = Linearizer(ast, opts=self.compiler.linearizer_opts)
|
||||
kb.required_optimizations()
|
||||
from tinygrad.features.search import beam_search, time_linearizer, bufs_from_lin
|
||||
test_rawbuffers = bufs_from_lin(kb) # allocate scratch buffers for optimization
|
||||
|
||||
Reference in New Issue
Block a user