From 8c501272f35da5f53bc4b852ebaa97b37e526c03 Mon Sep 17 00:00:00 2001 From: qazal <77887910+Qazalin@users.noreply.github.com> Date: Tue, 13 Aug 2024 05:29:29 +0800 Subject: [PATCH] proposal: MetaOps.EXT (#6054) `MetaOps.CUSTOM, MetaOps.COPY, MetaOps.EMPTY, MetaOps.VIEW` don't fit into any of our existing UOps. MetaOps.KERNEL and MetaOps.EXT can be the two paths in realize.py after AST is UOp: MetaOps.KERNEL -> UOps.SINK MetaOps.EXT -> UOps.EXT --- tinygrad/engine/realize.py | 14 +++++++------- tinygrad/engine/schedule.py | 2 +- tinygrad/ops.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tinygrad/engine/realize.py b/tinygrad/engine/realize.py index 8d933a88a3..c26bdc284a 100644 --- a/tinygrad/engine/realize.py +++ b/tinygrad/engine/realize.py @@ -187,19 +187,19 @@ class ExecItem: return et def lower_schedule_item(si:ScheduleItem) -> ExecItem: - assert len(set(x.device for x in si.bufs)) == 1 or si.ast.op is MetaOps.COPY or getenv("USE_COPY_KERNEL") + assert len(set(x.device for x in si.bufs)) == 1 or (si.ast.op is MetaOps.EXT and si.ast.arg[0] is MetaOps.COPY) or getenv("USE_COPY_KERNEL") if si.ast.op is MetaOps.KERNEL: runner = get_runner(si.outputs[0].device, si.ast) return ExecItem(runner, [si.bufs[x] for x in runner.p.globals], si.metadata) - out = si.outputs[0] - if si.ast.op is MetaOps.COPY: + out, (op, arg) = si.outputs[0], si.ast.arg + if op is MetaOps.COPY: kernel_type = BufferCopy if hasattr(Device[out.device].allocator, 'transfer') and out.device.split(":")[0] == si.inputs[0].device.split(":")[0]: kernel_type = BufferXfer - return ExecItem(kernel_type(si.ast.arg, out.device, si.inputs[0].device), list(si.bufs)) - if si.ast.op is MetaOps.CUSTOM: return ExecItem(CustomOp(si.ast.arg), list(si.bufs)) - if si.ast.op is MetaOps.EMPTY: return ExecItem(EmptyOp(out), list(si.bufs)) - if si.ast.op is MetaOps.VIEW: return ExecItem(ViewOp(out), list(si.bufs)) + return ExecItem(kernel_type(arg, out.device, si.inputs[0].device), list(si.bufs)) + if op is MetaOps.CUSTOM: return ExecItem(CustomOp(arg), list(si.bufs)) + if op is MetaOps.EMPTY: return ExecItem(EmptyOp(out), list(si.bufs)) + if op is MetaOps.VIEW: return ExecItem(ViewOp(out), list(si.bufs)) raise RuntimeError(f"don't know how to lower {si.ast}") def lower_schedule(schedule:List[ScheduleItem]) -> Generator[ExecItem, None, None]: diff --git a/tinygrad/engine/schedule.py b/tinygrad/engine/schedule.py index 266e839a17..005362cd48 100644 --- a/tinygrad/engine/schedule.py +++ b/tinygrad/engine/schedule.py @@ -147,7 +147,7 @@ def _lower_lazybuffer(outs:List[LazyBuffer], realizes:Dict[LazyBuffer, None]) -> wr = LazyOp(BufferOps.STORE, (rd,), MemBuffer(0, dtypes.uint8, st)) return LBScheduleItem(LazyOp(MetaOps.KERNEL, (wr,)), outs, [x.base for x in out.srcs]) if out.op in {MetaOps.CUSTOM, MetaOps.COPY, MetaOps.EMPTY, MetaOps.VIEW}: - return LBScheduleItem(LazyOp(out.op, (), out.arg), outs, [x.base for x in out.srcs]) + return LBScheduleItem(LazyOp(MetaOps.EXT, (), (out.op, out.arg)), outs, [x.base for x in out.srcs]) # push through all movementops between reduceops reduce_info: Dict[Tuple[LazyBuffer, ShapeTracker], Tuple[ShapeTracker, Tuple[int, ...]]] = {} seen_ops: Dict[Tuple[LazyBuffer, ShapeTracker], Optional[Tuple[LazyBuffer, ShapeTracker]]] = {} diff --git a/tinygrad/ops.py b/tinygrad/ops.py index 41f7e77402..b70eb14314 100644 --- a/tinygrad/ops.py +++ b/tinygrad/ops.py @@ -27,7 +27,7 @@ class ReduceOps(Enum): SUM = auto(); MAX = auto(); WMMA = auto() # noqa: E702 class BufferOps(Enum): LOAD = auto(); CONST = auto(); STORE = auto() # noqa: E702 class MetaOps(Enum): - EMPTY = auto(); CONST = auto(); COPY = auto(); CONTIGUOUS = auto(); CUSTOM = auto(); ASSIGN = auto(); VIEW = auto(); KERNEL = auto() # noqa: E702 + EMPTY = auto(); CONST = auto(); COPY = auto(); CONTIGUOUS = auto(); CUSTOM = auto(); ASSIGN = auto(); VIEW = auto(); KERNEL = auto(); EXT = auto() # noqa: E702 Op = Union[UnaryOps, BinaryOps, ReduceOps, MetaOps, TernaryOps, BufferOps] # do not preserve f(0) = 0