diff --git a/docs/abstractions2.py b/docs/abstractions2.py index 3c19e4631f..f5748a6208 100644 --- a/docs/abstractions2.py +++ b/docs/abstractions2.py @@ -59,11 +59,11 @@ st_0 = UOp(Ops.STORE, dtypes.void, (output_buf.view(ShapeTracker.from_shape((1,) s = UOp(Ops.SINK, dtypes.void, (st_0,)) # convert the computation to a "linearized" format (print the format) -from tinygrad.engine.realize import get_kernel, CompiledRunner -kernel = get_kernel(Device[DEVICE].renderer, s).linearize() +from tinygrad.engine.realize import get_program, CompiledRunner +program = get_program(Device[DEVICE].renderer, s) # compile a program (and print the source) -fxn = CompiledRunner(kernel.to_program()) +fxn = CompiledRunner(program) print(fxn.p.src) # NOTE: fxn.clprg is the CPUProgram diff --git a/test/external/external_uop_gc.py b/test/external/external_uop_gc.py index 03f6169b7f..f27f33be3c 100644 --- a/test/external/external_uop_gc.py +++ b/test/external/external_uop_gc.py @@ -1,7 +1,7 @@ import gc from tinygrad import Tensor, UOp, Device from tinygrad.shape.shapetracker import views_to_indexed_uops -from tinygrad.engine.realize import method_cache, get_kernel +from tinygrad.engine.realize import method_cache, get_program def uops_allocated(): return sum([isinstance(x, UOp) for x in gc.get_objects()]) def print_uops(): @@ -14,12 +14,10 @@ def two_plus_two(): Tensor([2])+Tensor([2]) def two_plus_two_schedule(): (Tensor([2])+Tensor([2])).schedule() def two_plus_two_kernel(): si = (Tensor([2])+Tensor([2])).schedule()[-1] - get_kernel(Device.default.renderer, si.ast) + get_program(Device.default.renderer, si.ast) def two_plus_two_linearize(): si = (Tensor([2])+Tensor([2])).schedule()[-1] - k = get_kernel(Device.default.renderer, si.ast) - k.get_optimized_ast() - #k.linearize() + get_program(Device.default.renderer, si.ast) def two_plus_two_realize(): (Tensor([2])+Tensor([2])).realize() def two_plus_two_item(): (Tensor([2])+Tensor([2])).item() def gradient_test(): @@ -36,7 +34,7 @@ def kernel_matmul(): y = Tensor([[2.0,0,-2.0]], requires_grad=True) z = y.matmul(x) si = z.schedule()[-1] - get_kernel(Device.default.renderer, si.ast) + get_program(Device.default.renderer, si.ast) def realized_matmul(): x = Tensor.eye(3, requires_grad=True) y = Tensor([[2.0,0,-2.0]], requires_grad=True) diff --git a/test/test_uops.py b/test/test_uops.py index cff71ff0c4..2f5ca3ea49 100644 --- a/test/test_uops.py +++ b/test/test_uops.py @@ -4,14 +4,14 @@ import numpy as np from tinygrad.shape.shapetracker import ShapeTracker from tinygrad.shape.view import View # noqa F401 from tinygrad.tensor import Tensor, _to_np_dtype -from tinygrad.helpers import CI, DEBUG, getenv, Context, Timing +from tinygrad.helpers import CI, DEBUG, getenv, Timing from tinygrad.dtype import dtypes, DType from tinygrad.device import Buffer, Device from tinygrad.uop.ops import Ops, UOp, UPat, KernelInfo, exec_alu # noqa F401 from tinygrad.uop.spec import spec from tinygrad.renderer import ProgramSpec from tinygrad.engine.grouper import fix_kernel_ops -from tinygrad.engine.realize import CompiledRunner, get_kernel +from tinygrad.engine.realize import CompiledRunner from tinygrad.codegen import full_rewrite from tinygrad.uop.symbolic import sym from tinygrad.device import is_dtype_supported @@ -461,13 +461,6 @@ class TestUOpStr(unittest.TestCase): assert len(str(a)) < 10_000, "exponential string growth" assert str(eval(str(a))) == str(a) - t = Tensor.arange(10) - t = t + t * Tensor.rand(10) - # nice big complicated uop - with Context(NOOPT=1): - sink = UOp(Ops.SINK, dtypes.void, (get_kernel(Device[Device.DEFAULT].renderer, t.schedule()[-1].ast).linearize().uops[-1],)) - self.assertEqual(sink, eval(str(sink))) - def test_vectorized_str(self): vec = UOp(Ops.VECTORIZE, dtypes.int.vec(4), tuple(UOp.const(dtypes.int, x) for x in range(4))) assert str(eval(str(vec))) == str(vec) diff --git a/tinygrad/engine/realize.py b/tinygrad/engine/realize.py index fc4352d11e..db0b0fc7fe 100644 --- a/tinygrad/engine/realize.py +++ b/tinygrad/engine/realize.py @@ -13,7 +13,7 @@ from tinygrad.engine.schedule import ScheduleItem # **************** Program Creation **************** logkerns, logkerns_level = open(getenv("LOGKERNS", ""), "a") if getenv("LOGKERNS", "") else None, getenv("LOGKERNS_LEVEL", 1) -def get_kernel(renderer:Renderer, ast:UOp) -> Kernel: +def get_program(renderer:Renderer, ast:UOp) -> ProgramSpec: k = Kernel(ast, opts=renderer) if not NOOPT: if not k.apply_tensor_cores(getenv("TC", 1)): k.apply_opts(hand_coded_optimizations(k)) @@ -23,7 +23,7 @@ def get_kernel(renderer:Renderer, ast:UOp) -> Kernel: rawbufs = bufs_from_lin(kb, allocate=False) k = beam_search(kb, rawbufs, BEAM.value, bool(getenv("BEAM_ESTIMATE", 1))) if logkerns is not None: logkerns.writelines([f"{(k.ast, k.applied_opts)}\n"]) - return k + return k.to_program() # **************** Runners **************** @@ -109,7 +109,7 @@ def get_runner(device:str, ast:UOp) -> CompiledRunner: if bret:=method_cache.get(bkey): method_cache[ckey] = ret = CompiledRunner(replace(bret.p, device=device), bret.lib) else: - prg: ProgramSpec = get_kernel(Device[device].renderer, ast).to_program() + prg: ProgramSpec = get_program(Device[device].renderer, ast) method_cache[ckey] = method_cache[bkey] = ret = CompiledRunner(replace(prg, device=device)) return ret