switch get_kernel -> get_program [pr] (#10817)

* switch get_kernel -> get_program [pr]

* fix tests
This commit is contained in:
George Hotz
2025-06-15 12:26:50 -07:00
committed by GitHub
parent a36b09a715
commit 5dc1bc6070
4 changed files with 12 additions and 21 deletions

View File

@@ -59,11 +59,11 @@ st_0 = UOp(Ops.STORE, dtypes.void, (output_buf.view(ShapeTracker.from_shape((1,)
s = UOp(Ops.SINK, dtypes.void, (st_0,)) s = UOp(Ops.SINK, dtypes.void, (st_0,))
# convert the computation to a "linearized" format (print the format) # convert the computation to a "linearized" format (print the format)
from tinygrad.engine.realize import get_kernel, CompiledRunner from tinygrad.engine.realize import get_program, CompiledRunner
kernel = get_kernel(Device[DEVICE].renderer, s).linearize() program = get_program(Device[DEVICE].renderer, s)
# compile a program (and print the source) # compile a program (and print the source)
fxn = CompiledRunner(kernel.to_program()) fxn = CompiledRunner(program)
print(fxn.p.src) print(fxn.p.src)
# NOTE: fxn.clprg is the CPUProgram # NOTE: fxn.clprg is the CPUProgram

View File

@@ -1,7 +1,7 @@
import gc import gc
from tinygrad import Tensor, UOp, Device from tinygrad import Tensor, UOp, Device
from tinygrad.shape.shapetracker import views_to_indexed_uops from tinygrad.shape.shapetracker import views_to_indexed_uops
from tinygrad.engine.realize import method_cache, get_kernel from tinygrad.engine.realize import method_cache, get_program
def uops_allocated(): return sum([isinstance(x, UOp) for x in gc.get_objects()]) def uops_allocated(): return sum([isinstance(x, UOp) for x in gc.get_objects()])
def print_uops(): def print_uops():
@@ -14,12 +14,10 @@ def two_plus_two(): Tensor([2])+Tensor([2])
def two_plus_two_schedule(): (Tensor([2])+Tensor([2])).schedule() def two_plus_two_schedule(): (Tensor([2])+Tensor([2])).schedule()
def two_plus_two_kernel(): def two_plus_two_kernel():
si = (Tensor([2])+Tensor([2])).schedule()[-1] si = (Tensor([2])+Tensor([2])).schedule()[-1]
get_kernel(Device.default.renderer, si.ast) get_program(Device.default.renderer, si.ast)
def two_plus_two_linearize(): def two_plus_two_linearize():
si = (Tensor([2])+Tensor([2])).schedule()[-1] si = (Tensor([2])+Tensor([2])).schedule()[-1]
k = get_kernel(Device.default.renderer, si.ast) get_program(Device.default.renderer, si.ast)
k.get_optimized_ast()
#k.linearize()
def two_plus_two_realize(): (Tensor([2])+Tensor([2])).realize() def two_plus_two_realize(): (Tensor([2])+Tensor([2])).realize()
def two_plus_two_item(): (Tensor([2])+Tensor([2])).item() def two_plus_two_item(): (Tensor([2])+Tensor([2])).item()
def gradient_test(): def gradient_test():
@@ -36,7 +34,7 @@ def kernel_matmul():
y = Tensor([[2.0,0,-2.0]], requires_grad=True) y = Tensor([[2.0,0,-2.0]], requires_grad=True)
z = y.matmul(x) z = y.matmul(x)
si = z.schedule()[-1] si = z.schedule()[-1]
get_kernel(Device.default.renderer, si.ast) get_program(Device.default.renderer, si.ast)
def realized_matmul(): def realized_matmul():
x = Tensor.eye(3, requires_grad=True) x = Tensor.eye(3, requires_grad=True)
y = Tensor([[2.0,0,-2.0]], requires_grad=True) y = Tensor([[2.0,0,-2.0]], requires_grad=True)

View File

@@ -4,14 +4,14 @@ import numpy as np
from tinygrad.shape.shapetracker import ShapeTracker from tinygrad.shape.shapetracker import ShapeTracker
from tinygrad.shape.view import View # noqa F401 from tinygrad.shape.view import View # noqa F401
from tinygrad.tensor import Tensor, _to_np_dtype from tinygrad.tensor import Tensor, _to_np_dtype
from tinygrad.helpers import CI, DEBUG, getenv, Context, Timing from tinygrad.helpers import CI, DEBUG, getenv, Timing
from tinygrad.dtype import dtypes, DType from tinygrad.dtype import dtypes, DType
from tinygrad.device import Buffer, Device from tinygrad.device import Buffer, Device
from tinygrad.uop.ops import Ops, UOp, UPat, KernelInfo, exec_alu # noqa F401 from tinygrad.uop.ops import Ops, UOp, UPat, KernelInfo, exec_alu # noqa F401
from tinygrad.uop.spec import spec from tinygrad.uop.spec import spec
from tinygrad.renderer import ProgramSpec from tinygrad.renderer import ProgramSpec
from tinygrad.engine.grouper import fix_kernel_ops from tinygrad.engine.grouper import fix_kernel_ops
from tinygrad.engine.realize import CompiledRunner, get_kernel from tinygrad.engine.realize import CompiledRunner
from tinygrad.codegen import full_rewrite from tinygrad.codegen import full_rewrite
from tinygrad.uop.symbolic import sym from tinygrad.uop.symbolic import sym
from tinygrad.device import is_dtype_supported from tinygrad.device import is_dtype_supported
@@ -461,13 +461,6 @@ class TestUOpStr(unittest.TestCase):
assert len(str(a)) < 10_000, "exponential string growth" assert len(str(a)) < 10_000, "exponential string growth"
assert str(eval(str(a))) == str(a) assert str(eval(str(a))) == str(a)
t = Tensor.arange(10)
t = t + t * Tensor.rand(10)
# nice big complicated uop
with Context(NOOPT=1):
sink = UOp(Ops.SINK, dtypes.void, (get_kernel(Device[Device.DEFAULT].renderer, t.schedule()[-1].ast).linearize().uops[-1],))
self.assertEqual(sink, eval(str(sink)))
def test_vectorized_str(self): def test_vectorized_str(self):
vec = UOp(Ops.VECTORIZE, dtypes.int.vec(4), tuple(UOp.const(dtypes.int, x) for x in range(4))) vec = UOp(Ops.VECTORIZE, dtypes.int.vec(4), tuple(UOp.const(dtypes.int, x) for x in range(4)))
assert str(eval(str(vec))) == str(vec) assert str(eval(str(vec))) == str(vec)

View File

@@ -13,7 +13,7 @@ from tinygrad.engine.schedule import ScheduleItem
# **************** Program Creation **************** # **************** Program Creation ****************
logkerns, logkerns_level = open(getenv("LOGKERNS", ""), "a") if getenv("LOGKERNS", "") else None, getenv("LOGKERNS_LEVEL", 1) logkerns, logkerns_level = open(getenv("LOGKERNS", ""), "a") if getenv("LOGKERNS", "") else None, getenv("LOGKERNS_LEVEL", 1)
def get_kernel(renderer:Renderer, ast:UOp) -> Kernel: def get_program(renderer:Renderer, ast:UOp) -> ProgramSpec:
k = Kernel(ast, opts=renderer) k = Kernel(ast, opts=renderer)
if not NOOPT: if not NOOPT:
if not k.apply_tensor_cores(getenv("TC", 1)): k.apply_opts(hand_coded_optimizations(k)) if not k.apply_tensor_cores(getenv("TC", 1)): k.apply_opts(hand_coded_optimizations(k))
@@ -23,7 +23,7 @@ def get_kernel(renderer:Renderer, ast:UOp) -> Kernel:
rawbufs = bufs_from_lin(kb, allocate=False) rawbufs = bufs_from_lin(kb, allocate=False)
k = beam_search(kb, rawbufs, BEAM.value, bool(getenv("BEAM_ESTIMATE", 1))) k = beam_search(kb, rawbufs, BEAM.value, bool(getenv("BEAM_ESTIMATE", 1)))
if logkerns is not None: logkerns.writelines([f"{(k.ast, k.applied_opts)}\n"]) if logkerns is not None: logkerns.writelines([f"{(k.ast, k.applied_opts)}\n"])
return k return k.to_program()
# **************** Runners **************** # **************** Runners ****************
@@ -109,7 +109,7 @@ def get_runner(device:str, ast:UOp) -> CompiledRunner:
if bret:=method_cache.get(bkey): if bret:=method_cache.get(bkey):
method_cache[ckey] = ret = CompiledRunner(replace(bret.p, device=device), bret.lib) method_cache[ckey] = ret = CompiledRunner(replace(bret.p, device=device), bret.lib)
else: else:
prg: ProgramSpec = get_kernel(Device[device].renderer, ast).to_program() prg: ProgramSpec = get_program(Device[device].renderer, ast)
method_cache[ckey] = method_cache[bkey] = ret = CompiledRunner(replace(prg, device=device)) method_cache[ckey] = method_cache[bkey] = ret = CompiledRunner(replace(prg, device=device))
return ret return ret