mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-25 06:48:22 -05:00
proposal: MetaOps.EXT (#6054)
`MetaOps.CUSTOM, MetaOps.COPY, MetaOps.EMPTY, MetaOps.VIEW` don't fit into any of our existing UOps. MetaOps.KERNEL and MetaOps.EXT can be the two paths in realize.py after AST is UOp: MetaOps.KERNEL -> UOps.SINK MetaOps.EXT -> UOps.EXT
This commit is contained in:
@@ -187,19 +187,19 @@ class ExecItem:
|
||||
return et
|
||||
|
||||
def lower_schedule_item(si:ScheduleItem) -> ExecItem:
|
||||
assert len(set(x.device for x in si.bufs)) == 1 or si.ast.op is MetaOps.COPY or getenv("USE_COPY_KERNEL")
|
||||
assert len(set(x.device for x in si.bufs)) == 1 or (si.ast.op is MetaOps.EXT and si.ast.arg[0] is MetaOps.COPY) or getenv("USE_COPY_KERNEL")
|
||||
if si.ast.op is MetaOps.KERNEL:
|
||||
runner = get_runner(si.outputs[0].device, si.ast)
|
||||
return ExecItem(runner, [si.bufs[x] for x in runner.p.globals], si.metadata)
|
||||
out = si.outputs[0]
|
||||
if si.ast.op is MetaOps.COPY:
|
||||
out, (op, arg) = si.outputs[0], si.ast.arg
|
||||
if op is MetaOps.COPY:
|
||||
kernel_type = BufferCopy
|
||||
if hasattr(Device[out.device].allocator, 'transfer') and out.device.split(":")[0] == si.inputs[0].device.split(":")[0]:
|
||||
kernel_type = BufferXfer
|
||||
return ExecItem(kernel_type(si.ast.arg, out.device, si.inputs[0].device), list(si.bufs))
|
||||
if si.ast.op is MetaOps.CUSTOM: return ExecItem(CustomOp(si.ast.arg), list(si.bufs))
|
||||
if si.ast.op is MetaOps.EMPTY: return ExecItem(EmptyOp(out), list(si.bufs))
|
||||
if si.ast.op is MetaOps.VIEW: return ExecItem(ViewOp(out), list(si.bufs))
|
||||
return ExecItem(kernel_type(arg, out.device, si.inputs[0].device), list(si.bufs))
|
||||
if op is MetaOps.CUSTOM: return ExecItem(CustomOp(arg), list(si.bufs))
|
||||
if op is MetaOps.EMPTY: return ExecItem(EmptyOp(out), list(si.bufs))
|
||||
if op is MetaOps.VIEW: return ExecItem(ViewOp(out), list(si.bufs))
|
||||
raise RuntimeError(f"don't know how to lower {si.ast}")
|
||||
|
||||
def lower_schedule(schedule:List[ScheduleItem]) -> Generator[ExecItem, None, None]:
|
||||
|
||||
@@ -147,7 +147,7 @@ def _lower_lazybuffer(outs:List[LazyBuffer], realizes:Dict[LazyBuffer, None]) ->
|
||||
wr = LazyOp(BufferOps.STORE, (rd,), MemBuffer(0, dtypes.uint8, st))
|
||||
return LBScheduleItem(LazyOp(MetaOps.KERNEL, (wr,)), outs, [x.base for x in out.srcs])
|
||||
if out.op in {MetaOps.CUSTOM, MetaOps.COPY, MetaOps.EMPTY, MetaOps.VIEW}:
|
||||
return LBScheduleItem(LazyOp(out.op, (), out.arg), outs, [x.base for x in out.srcs])
|
||||
return LBScheduleItem(LazyOp(MetaOps.EXT, (), (out.op, out.arg)), outs, [x.base for x in out.srcs])
|
||||
# push through all movementops between reduceops
|
||||
reduce_info: Dict[Tuple[LazyBuffer, ShapeTracker], Tuple[ShapeTracker, Tuple[int, ...]]] = {}
|
||||
seen_ops: Dict[Tuple[LazyBuffer, ShapeTracker], Optional[Tuple[LazyBuffer, ShapeTracker]]] = {}
|
||||
|
||||
@@ -27,7 +27,7 @@ class ReduceOps(Enum):
|
||||
SUM = auto(); MAX = auto(); WMMA = auto() # noqa: E702
|
||||
class BufferOps(Enum): LOAD = auto(); CONST = auto(); STORE = auto() # noqa: E702
|
||||
class MetaOps(Enum):
|
||||
EMPTY = auto(); CONST = auto(); COPY = auto(); CONTIGUOUS = auto(); CUSTOM = auto(); ASSIGN = auto(); VIEW = auto(); KERNEL = auto() # noqa: E702
|
||||
EMPTY = auto(); CONST = auto(); COPY = auto(); CONTIGUOUS = auto(); CUSTOM = auto(); ASSIGN = auto(); VIEW = auto(); KERNEL = auto(); EXT = auto() # noqa: E702
|
||||
Op = Union[UnaryOps, BinaryOps, ReduceOps, MetaOps, TernaryOps, BufferOps]
|
||||
|
||||
# do not preserve f(0) = 0
|
||||
|
||||
Reference in New Issue
Block a user