switch get_kernel -> get_program [pr] (#10817)

* switch get_kernel -> get_program [pr]

* fix tests
This commit is contained in:
George Hotz
2025-06-15 12:26:50 -07:00
committed by GitHub
parent a36b09a715
commit 5dc1bc6070
4 changed files with 12 additions and 21 deletions

View File

@@ -59,11 +59,11 @@ st_0 = UOp(Ops.STORE, dtypes.void, (output_buf.view(ShapeTracker.from_shape((1,)
s = UOp(Ops.SINK, dtypes.void, (st_0,))
# convert the computation to a "linearized" format (print the format)
from tinygrad.engine.realize import get_kernel, CompiledRunner
kernel = get_kernel(Device[DEVICE].renderer, s).linearize()
from tinygrad.engine.realize import get_program, CompiledRunner
program = get_program(Device[DEVICE].renderer, s)
# compile a program (and print the source)
fxn = CompiledRunner(kernel.to_program())
fxn = CompiledRunner(program)
print(fxn.p.src)
# NOTE: fxn.clprg is the CPUProgram

View File

@@ -1,7 +1,7 @@
import gc
from tinygrad import Tensor, UOp, Device
from tinygrad.shape.shapetracker import views_to_indexed_uops
from tinygrad.engine.realize import method_cache, get_kernel
from tinygrad.engine.realize import method_cache, get_program
def uops_allocated(): return sum([isinstance(x, UOp) for x in gc.get_objects()])
def print_uops():
@@ -14,12 +14,10 @@ def two_plus_two(): Tensor([2])+Tensor([2])
def two_plus_two_schedule(): (Tensor([2])+Tensor([2])).schedule()
def two_plus_two_kernel():
si = (Tensor([2])+Tensor([2])).schedule()[-1]
get_kernel(Device.default.renderer, si.ast)
get_program(Device.default.renderer, si.ast)
def two_plus_two_linearize():
si = (Tensor([2])+Tensor([2])).schedule()[-1]
k = get_kernel(Device.default.renderer, si.ast)
k.get_optimized_ast()
#k.linearize()
get_program(Device.default.renderer, si.ast)
def two_plus_two_realize(): (Tensor([2])+Tensor([2])).realize()
def two_plus_two_item(): (Tensor([2])+Tensor([2])).item()
def gradient_test():
@@ -36,7 +34,7 @@ def kernel_matmul():
y = Tensor([[2.0,0,-2.0]], requires_grad=True)
z = y.matmul(x)
si = z.schedule()[-1]
get_kernel(Device.default.renderer, si.ast)
get_program(Device.default.renderer, si.ast)
def realized_matmul():
x = Tensor.eye(3, requires_grad=True)
y = Tensor([[2.0,0,-2.0]], requires_grad=True)

View File

@@ -4,14 +4,14 @@ import numpy as np
from tinygrad.shape.shapetracker import ShapeTracker
from tinygrad.shape.view import View # noqa F401
from tinygrad.tensor import Tensor, _to_np_dtype
from tinygrad.helpers import CI, DEBUG, getenv, Context, Timing
from tinygrad.helpers import CI, DEBUG, getenv, Timing
from tinygrad.dtype import dtypes, DType
from tinygrad.device import Buffer, Device
from tinygrad.uop.ops import Ops, UOp, UPat, KernelInfo, exec_alu # noqa F401
from tinygrad.uop.spec import spec
from tinygrad.renderer import ProgramSpec
from tinygrad.engine.grouper import fix_kernel_ops
from tinygrad.engine.realize import CompiledRunner, get_kernel
from tinygrad.engine.realize import CompiledRunner
from tinygrad.codegen import full_rewrite
from tinygrad.uop.symbolic import sym
from tinygrad.device import is_dtype_supported
@@ -461,13 +461,6 @@ class TestUOpStr(unittest.TestCase):
assert len(str(a)) < 10_000, "exponential string growth"
assert str(eval(str(a))) == str(a)
t = Tensor.arange(10)
t = t + t * Tensor.rand(10)
# nice big complicated uop
with Context(NOOPT=1):
sink = UOp(Ops.SINK, dtypes.void, (get_kernel(Device[Device.DEFAULT].renderer, t.schedule()[-1].ast).linearize().uops[-1],))
self.assertEqual(sink, eval(str(sink)))
def test_vectorized_str(self):
vec = UOp(Ops.VECTORIZE, dtypes.int.vec(4), tuple(UOp.const(dtypes.int, x) for x in range(4)))
assert str(eval(str(vec))) == str(vec)

View File

@@ -13,7 +13,7 @@ from tinygrad.engine.schedule import ScheduleItem
# **************** Program Creation ****************
logkerns, logkerns_level = open(getenv("LOGKERNS", ""), "a") if getenv("LOGKERNS", "") else None, getenv("LOGKERNS_LEVEL", 1)
def get_kernel(renderer:Renderer, ast:UOp) -> Kernel:
def get_program(renderer:Renderer, ast:UOp) -> ProgramSpec:
k = Kernel(ast, opts=renderer)
if not NOOPT:
if not k.apply_tensor_cores(getenv("TC", 1)): k.apply_opts(hand_coded_optimizations(k))
@@ -23,7 +23,7 @@ def get_kernel(renderer:Renderer, ast:UOp) -> Kernel:
rawbufs = bufs_from_lin(kb, allocate=False)
k = beam_search(kb, rawbufs, BEAM.value, bool(getenv("BEAM_ESTIMATE", 1)))
if logkerns is not None: logkerns.writelines([f"{(k.ast, k.applied_opts)}\n"])
return k
return k.to_program()
# **************** Runners ****************
@@ -109,7 +109,7 @@ def get_runner(device:str, ast:UOp) -> CompiledRunner:
if bret:=method_cache.get(bkey):
method_cache[ckey] = ret = CompiledRunner(replace(bret.p, device=device), bret.lib)
else:
prg: ProgramSpec = get_kernel(Device[device].renderer, ast).to_program()
prg: ProgramSpec = get_program(Device[device].renderer, ast)
method_cache[ckey] = method_cache[bkey] = ret = CompiledRunner(replace(prg, device=device))
return ret