proposal: MetaOps.EXT (#6054)

`MetaOps.CUSTOM, MetaOps.COPY, MetaOps.EMPTY, MetaOps.VIEW` don't fit into any of our existing UOps.

MetaOps.KERNEL and MetaOps.EXT can be the two paths in realize.py

after AST is UOp:
MetaOps.KERNEL -> UOps.SINK
MetaOps.EXT -> UOps.EXT
This commit is contained in:
qazal
2024-08-13 05:29:29 +08:00
committed by GitHub
parent 059cf2a90d
commit 8c501272f3
3 changed files with 9 additions and 9 deletions

View File

@@ -187,19 +187,19 @@ class ExecItem:
return et
def lower_schedule_item(si:ScheduleItem) -> ExecItem:
assert len(set(x.device for x in si.bufs)) == 1 or si.ast.op is MetaOps.COPY or getenv("USE_COPY_KERNEL")
assert len(set(x.device for x in si.bufs)) == 1 or (si.ast.op is MetaOps.EXT and si.ast.arg[0] is MetaOps.COPY) or getenv("USE_COPY_KERNEL")
if si.ast.op is MetaOps.KERNEL:
runner = get_runner(si.outputs[0].device, si.ast)
return ExecItem(runner, [si.bufs[x] for x in runner.p.globals], si.metadata)
out = si.outputs[0]
if si.ast.op is MetaOps.COPY:
out, (op, arg) = si.outputs[0], si.ast.arg
if op is MetaOps.COPY:
kernel_type = BufferCopy
if hasattr(Device[out.device].allocator, 'transfer') and out.device.split(":")[0] == si.inputs[0].device.split(":")[0]:
kernel_type = BufferXfer
return ExecItem(kernel_type(si.ast.arg, out.device, si.inputs[0].device), list(si.bufs))
if si.ast.op is MetaOps.CUSTOM: return ExecItem(CustomOp(si.ast.arg), list(si.bufs))
if si.ast.op is MetaOps.EMPTY: return ExecItem(EmptyOp(out), list(si.bufs))
if si.ast.op is MetaOps.VIEW: return ExecItem(ViewOp(out), list(si.bufs))
return ExecItem(kernel_type(arg, out.device, si.inputs[0].device), list(si.bufs))
if op is MetaOps.CUSTOM: return ExecItem(CustomOp(arg), list(si.bufs))
if op is MetaOps.EMPTY: return ExecItem(EmptyOp(out), list(si.bufs))
if op is MetaOps.VIEW: return ExecItem(ViewOp(out), list(si.bufs))
raise RuntimeError(f"don't know how to lower {si.ast}")
def lower_schedule(schedule:List[ScheduleItem]) -> Generator[ExecItem, None, None]:

View File

@@ -147,7 +147,7 @@ def _lower_lazybuffer(outs:List[LazyBuffer], realizes:Dict[LazyBuffer, None]) ->
wr = LazyOp(BufferOps.STORE, (rd,), MemBuffer(0, dtypes.uint8, st))
return LBScheduleItem(LazyOp(MetaOps.KERNEL, (wr,)), outs, [x.base for x in out.srcs])
if out.op in {MetaOps.CUSTOM, MetaOps.COPY, MetaOps.EMPTY, MetaOps.VIEW}:
return LBScheduleItem(LazyOp(out.op, (), out.arg), outs, [x.base for x in out.srcs])
return LBScheduleItem(LazyOp(MetaOps.EXT, (), (out.op, out.arg)), outs, [x.base for x in out.srcs])
# push through all movementops between reduceops
reduce_info: Dict[Tuple[LazyBuffer, ShapeTracker], Tuple[ShapeTracker, Tuple[int, ...]]] = {}
seen_ops: Dict[Tuple[LazyBuffer, ShapeTracker], Optional[Tuple[LazyBuffer, ShapeTracker]]] = {}

View File

@@ -27,7 +27,7 @@ class ReduceOps(Enum):
SUM = auto(); MAX = auto(); WMMA = auto() # noqa: E702
class BufferOps(Enum): LOAD = auto(); CONST = auto(); STORE = auto() # noqa: E702
class MetaOps(Enum):
EMPTY = auto(); CONST = auto(); COPY = auto(); CONTIGUOUS = auto(); CUSTOM = auto(); ASSIGN = auto(); VIEW = auto(); KERNEL = auto() # noqa: E702
EMPTY = auto(); CONST = auto(); COPY = auto(); CONTIGUOUS = auto(); CUSTOM = auto(); ASSIGN = auto(); VIEW = auto(); KERNEL = auto(); EXT = auto() # noqa: E702
Op = Union[UnaryOps, BinaryOps, ReduceOps, MetaOps, TernaryOps, BufferOps]
# do not preserve f(0) = 0