diff --git a/docs/abstractions2.py b/docs/abstractions2.py index 495d15b0c5..a7834f74ab 100644 --- a/docs/abstractions2.py +++ b/docs/abstractions2.py @@ -79,8 +79,8 @@ from tinygrad.engine.realize import run_schedule from tinygrad.engine.schedule import create_schedule # allocate some values + load in values -a = LazyBuffer.loadop(MetaOps.EMPTY, (1,), dtypes.int32, DEVICE) -b = LazyBuffer.loadop(MetaOps.EMPTY, (1,), dtypes.int32, DEVICE) +a = LazyBuffer.metaop(MetaOps.EMPTY, (1,), dtypes.int32, DEVICE) +b = LazyBuffer.metaop(MetaOps.EMPTY, (1,), dtypes.int32, DEVICE) a.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 2)))) b.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 3)))) del a.srcs diff --git a/examples/openpilot/compile2.py b/examples/openpilot/compile2.py index 6204e4a88e..bb2122fb5d 100644 --- a/examples/openpilot/compile2.py +++ b/examples/openpilot/compile2.py @@ -48,7 +48,7 @@ def get_schedule(onnx_data) -> Tuple[List[ScheduleItem], List[ScheduleItem]]: schedule, schedule_independent = partition(schedule, lambda si: any(out in depends for out in si.outputs)) print(f"{len(schedule)} schedule items depend on the input, {len(schedule_independent)} don't") - # confirm no loadops in the (non independent) schedule except for the ones that load the input buffers + # confirm no non-sink metaop in the (non independent) schedule except for the ones that load the input buffers assert all(si.ast.op is MetaOps.SINK or out in input_lb for si in schedule for out in si.outputs), "has non SINK ops, can't compile to Thneed" return schedule, schedule_independent, inputs diff --git a/test/test_schedule.py b/test/test_schedule.py index 18e1d0954d..11c78ab1ff 100644 --- a/test/test_schedule.py +++ b/test/test_schedule.py @@ -19,7 +19,7 @@ from tinygrad.function import Function from tinygrad.lazy import LazyBuffer, view_supported_devices class KernelCountException(Exception): pass -def check_schedule(t:Union[Tensor, List[Tensor]], allowed:int, to_prerealize:Optional[List[Tensor]]=None, filter_loadops=True): +def check_schedule(t:Union[Tensor, List[Tensor]], allowed:int, to_prerealize:Optional[List[Tensor]]=None, filter_sink=True): if isinstance(t, Tensor): t = [t] seen = set() if to_prerealize: @@ -28,14 +28,14 @@ def check_schedule(t:Union[Tensor, List[Tensor]], allowed:int, to_prerealize:Opt for i,out in enumerate(s.outputs): seen.add(out) sched = create_schedule(flatten([r.lazydata.lbs for r in t]), seen) - if filter_loadops: sched = [s for s in sched if s.ast.op is MetaOps.SINK] + if filter_sink: sched = [s for s in sched if s.ast.op is MetaOps.SINK] if len(sched) != allowed: print(f"SCHEDULE ISSUE, expecting {allowed} got {len(sched)}") if len(sched) != allowed or DEBUG >= 3: for i, s in enumerate(sched): print("kernel", i+1) print_tree(s.ast) if len(sched) != allowed: raise KernelCountException(f"{len(sched)=} != {allowed}") - # test the (non loadops) ops linearize + # test the (sink) ops linearize for s in sched: if s.ast.op is not MetaOps.SINK: continue l = Kernel(s.ast) @@ -87,7 +87,7 @@ class TestSchedule(unittest.TestCase): def test_constants_are_embedded(self): a = Tensor.empty(3,3) * 2 - check_schedule(a, 2, filter_loadops=False) + check_schedule(a, 2, filter_sink=False) def test_binop_elu_fusion(self): a = Tensor.empty(10) @@ -433,12 +433,12 @@ class TestSchedule(unittest.TestCase): def test_contiguous_while_contiguous(self): x = Tensor.empty(1, 64, 32, 32) out = x.contiguous() - check_schedule(out, 1, filter_loadops=False) + check_schedule(out, 1, filter_sink=False) def test_contiguous_while_not_contiguous(self): x = Tensor.empty(1, 64, 32, 32) out = x.permute(0,2,3,1).contiguous() - check_schedule(out, 2, filter_loadops=False) + check_schedule(out, 2, filter_sink=False) def test_fold_with_contiguous(self): a = Tensor.randn(16, 16, 16).realize() @@ -449,7 +449,7 @@ class TestSchedule(unittest.TestCase): def test_double_from(self): x = Tensor([1,2,3,4]) out = x.to('npy') - check_schedule(out, 0, filter_loadops=False) + check_schedule(out, 0, filter_sink=False) def test_pow_const_tensor_simplified(self): x = Tensor([1,2,3,4]) @@ -466,7 +466,7 @@ class TestSchedule(unittest.TestCase): def test_zero_size(self): x = Tensor.empty(2, 3, 0) out = x + 1 - check_schedule(out, 0, filter_loadops=False) + check_schedule(out, 0, filter_sink=False) def test_reduce_permute_nofuse(self): x = Tensor.empty(32, 32, 32) @@ -552,7 +552,7 @@ class TestSchedule(unittest.TestCase): x = Tensor(2) + Tensor(2) y = Tensor(2) + Tensor(2) out = x.contiguous() + y.contiguous() - with self.assertRaises(KernelCountException): check_schedule(out, 2, filter_loadops=False) + with self.assertRaises(KernelCountException): check_schedule(out, 2, filter_sink=False) # multireduce spec def test_reduce_same_size(self): diff --git a/tinygrad/lazy.py b/tinygrad/lazy.py index 5e5510dd56..a3bded15b2 100644 --- a/tinygrad/lazy.py +++ b/tinygrad/lazy.py @@ -66,20 +66,20 @@ class LazyBuffer: def lbs(self) -> List[LazyBuffer]: return [self] @staticmethod - def loadop(op, shape:Tuple[sint,...], dtype:DType, device:str, arg=None, src:Tuple[LazyBuffer, ...]=(), enable_cache=False) -> LazyBuffer: + def metaop(op, shape:Tuple[sint,...], dtype:DType, device:str, arg=None, src:Tuple[LazyBuffer, ...]=(), enable_cache=False) -> LazyBuffer: assert isinstance(src, tuple) return create_lazybuffer(device, ShapeTracker.from_shape(shape), dtype, op, arg, src, enable_cache=enable_cache) def const(self, val:ConstType, shape:Optional[Tuple[sint,...]]=None) -> LazyBuffer: assert isinstance(val, (int,float,bool)), f"{val=} has {type(val)=}, not a ConstType" shape = self.shape if shape is None else shape - return LazyBuffer.loadop(MetaOps.CONST, tuple(), self.dtype, self.device, arg=val).reshape((1,)*len(shape)).expand(shape) + return LazyBuffer.metaop(MetaOps.CONST, tuple(), self.dtype, self.device, arg=val).reshape((1,)*len(shape)).expand(shape) def is_realized(self) -> bool: return self.base.realized is not None def assign(self, x:LazyBuffer) -> LazyBuffer: assert x.size == self.size, f"assign target must have same size {self.size=} != {x.size=}" - return LazyBuffer.loadop(MetaOps.ASSIGN, self.shape, self.dtype, self.device, arg=() if self.st.contiguous else (self.st,), src=(x, self.base)) + return LazyBuffer.metaop(MetaOps.ASSIGN, self.shape, self.dtype, self.device, arg=() if self.st.contiguous else (self.st,), src=(x, self.base)) def can_view(self): return self.st.consecutive and not self.is_unrealized_const() and self.device.split(":")[0] in view_supported_devices @@ -125,7 +125,7 @@ class LazyBuffer: # const doesn't have to be copied (issues with disk tensor) if self.is_unrealized_const(): - return LazyBuffer.loadop(MetaOps.CONST, tuple(), self.dtype, device, arg=self.base.arg)._view(self.st) + return LazyBuffer.metaop(MetaOps.CONST, tuple(), self.dtype, device, arg=self.base.arg)._view(self.st) # if it's a shrink, do the shrink before the copy with CONTIGUOUS if prod(self.st.shape) < prod(self.base.st.shape): return self.contiguous()._copy(device) diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 01162c8103..1c7ab54148 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -41,24 +41,24 @@ class Function: import tinygrad.function as F -def _loadop(op, shape:Tuple[sint,...], dtype:DType, device:Union[str, Tuple[str, ...]], arg=None, src:Tuple[LazyBuffer, ...]=()): - if isinstance(device, str): return LazyBuffer.loadop(op, shape, dtype, device, arg, src) - return MultiLazyBuffer([LazyBuffer.loadop(op, shape, dtype, d, arg, src) for d in device], None) +def _metaop(op, shape:Tuple[sint,...], dtype:DType, device:Union[str, Tuple[str, ...]], arg=None, src:Tuple[LazyBuffer, ...]=()): + if isinstance(device, str): return LazyBuffer.metaop(op, shape, dtype, device, arg, src) + return MultiLazyBuffer([LazyBuffer.metaop(op, shape, dtype, d, arg, src) for d in device], None) def _from_np_dtype(npdtype:type) -> DType: return dtypes.fields()[np.dtype(npdtype).name] def _to_np_dtype(dtype:DType) -> Optional[type]: return np.dtype(dtype.fmt).type if dtype.fmt is not None else None def _fromnp(x: np.ndarray) -> LazyBuffer: - ret = LazyBuffer.loadop(MetaOps.EMPTY, x.shape, _from_np_dtype(x.dtype), "NPY") + ret = LazyBuffer.metaop(MetaOps.EMPTY, x.shape, _from_np_dtype(x.dtype), "NPY") # fake realize ret.buffer.allocate(x) del ret.srcs return ret def _frompy(x:Union[List, Tuple, bytes], dtype:DType) -> LazyBuffer: - if isinstance(x, bytes): ret, data = LazyBuffer.loadop(MetaOps.EMPTY, (len(x),), dtype, "PYTHON"), x + if isinstance(x, bytes): ret, data = LazyBuffer.metaop(MetaOps.EMPTY, (len(x),), dtype, "PYTHON"), x else: - ret = LazyBuffer.loadop(MetaOps.EMPTY, get_shape(x), dtype, "PYTHON") + ret = LazyBuffer.metaop(MetaOps.EMPTY, get_shape(x), dtype, "PYTHON") assert dtype.fmt is not None, f"{dtype=} has None fmt" truncate_function = truncate[dtype] data = struct.pack(f"@{ret.size}{dtype.fmt}", *[truncate_function(xi) for xi in fully_flatten(x)]) @@ -122,8 +122,8 @@ class Tensor: # create a LazyBuffer from the different types of inputs if isinstance(data, LazyBuffer): assert dtype is None or dtype == data.dtype, "dtype doesn't match, and casting isn't supported" - elif isinstance(data, get_args(ConstType)): data = _loadop(MetaOps.CONST, tuple(), dtype or dtypes.from_py(data), device, data) - elif isinstance(data, Variable): data = _loadop(MetaOps.CONST, tuple(), dtype or dtypes.from_py(data.unbind()[1]), device, data) + elif isinstance(data, get_args(ConstType)): data = _metaop(MetaOps.CONST, tuple(), dtype or dtypes.from_py(data), device, data) + elif isinstance(data, Variable): data = _metaop(MetaOps.CONST, tuple(), dtype or dtypes.from_py(data.unbind()[1]), device, data) elif isinstance(data, bytes): data = _frompy(data, dtypes.uint8) elif isinstance(data, (list, tuple)): if dtype is None: @@ -131,9 +131,9 @@ class Tensor: else: dtype = dtypes.default_int if d and all_int(d) else dtypes.default_float if dtype == dtypes.bfloat16: data = Tensor(_fromnp(np.array(data, np.float32)), device=device).cast(dtypes.bfloat16).lazydata else: data = _fromnp(np.array(data).astype(_to_np_dtype(dtype))) - elif data is None: data = _loadop(MetaOps.EMPTY, (0,), dtype or dtypes.default_float, device) + elif data is None: data = _metaop(MetaOps.EMPTY, (0,), dtype or dtypes.default_float, device) elif isinstance(data, np.ndarray): - if data.shape == (): data = _loadop(MetaOps.CONST, tuple(), dtype or _from_np_dtype(data.dtype), device, data.item()) + if data.shape == (): data = _metaop(MetaOps.CONST, tuple(), dtype or _from_np_dtype(data.dtype), device, data.item()) else: data = _fromnp(data.astype(npdtype) if dtype is not None and (npdtype:=_to_np_dtype(dtype)) is not None else data) # by this point, it has to be a LazyBuffer @@ -344,14 +344,14 @@ class Tensor: if isinstance(y, SumNode): return Tensor.from_node(y.nodes[0], **kwargs) + sum(y.nodes[1:]) raise RuntimeError(f"unhandled Node {y}") - # ***** creation llop entrypoint ***** + # ***** creation entrypoint ***** @staticmethod - def _loadop(op, shape, device:Optional[Union[Tuple[str, ...], str]]=None, dtype:Optional[DType]=None, arg=None, **kwargs): + def _metaop(op, shape, device:Optional[Union[Tuple[str, ...], str]]=None, dtype:Optional[DType]=None, arg=None, **kwargs): if isinstance(device, tuple): - return Tensor(MultiLazyBuffer([LazyBuffer.loadop(op, shape, dtype or dtypes.default_float, Device.canonicalize(d), arg) \ + return Tensor(MultiLazyBuffer([LazyBuffer.metaop(op, shape, dtype or dtypes.default_float, Device.canonicalize(d), arg) \ for d in device], None), device, dtype, **kwargs) - return Tensor(LazyBuffer.loadop(op, shape, dtype or dtypes.default_float, Device.canonicalize(device), arg), device, dtype, **kwargs) + return Tensor(LazyBuffer.metaop(op, shape, dtype or dtypes.default_float, Device.canonicalize(device), arg), device, dtype, **kwargs) @staticmethod def empty(*shape, **kwargs): @@ -366,7 +366,7 @@ class Tensor: print(t.shape) ``` """ - return Tensor._loadop(MetaOps.EMPTY, argfix(*shape), **kwargs) + return Tensor._metaop(MetaOps.EMPTY, argfix(*shape), **kwargs) _seed: int = int(time.time()) _rng_counter: Optional[Tensor] = None @@ -407,7 +407,7 @@ class Tensor: # for bfloat16, numpy rand passes buffer in float if (dtype or dtypes.default_float) == dtypes.bfloat16: return Tensor.rand(*shape, **kwargs, device=device, dtype=dtypes.float).cast(dtypes.bfloat16) - return Tensor._loadop(MetaOps.CUSTOM, argfix(*shape), arg=custom_random, device=device, dtype=dtype, **kwargs) + return Tensor._metaop(MetaOps.CUSTOM, argfix(*shape), arg=custom_random, device=device, dtype=dtype, **kwargs) # threefry if (num := prod((shape:=argfix(*shape)))) == 0: return Tensor.zeros(shape, device=device, dtype=dtype, **kwargs)