From 6356474d6d171682a62e6a533f057cd473948fe9 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Thu, 15 Feb 2024 12:16:10 +0100 Subject: [PATCH] Revert "ops_ext to replace cpu import (#3406)" (#3408) This reverts commit 91eb93f85a78cd12ea469eaec80d728845881b42. --- docs/abstractions.py | 4 ++-- test/external/external_test_example.py | 2 +- test/test_schedule.py | 2 +- tinygrad/codegen/kernel.py | 2 +- tinygrad/device.py | 5 +---- tinygrad/features/search.py | 2 +- tinygrad/runtime/ops_ext.py | 12 ------------ tinygrad/runtime/ops_python.py | 2 +- tinygrad/tensor.py | 9 +++------ 9 files changed, 11 insertions(+), 29 deletions(-) delete mode 100644 tinygrad/runtime/ops_ext.py diff --git a/docs/abstractions.py b/docs/abstractions.py index f326b733da..8f8ebeb123 100644 --- a/docs/abstractions.py +++ b/docs/abstractions.py @@ -135,8 +135,8 @@ assert len(lazyop.srcs) == 2 # the source is a LazyBuffer that is a "CPU" Tensor # again, a LazyOp AST is like a GPU kernel. you have to copy the data on the device first assert lazyop.srcs[0].op == LoadOps.COPY -assert lazyop.srcs[0].srcs[0].device == "EXT" -assert lazyop.srcs[0].srcs[0].realized._buf[0][0] == 2, "the src of the COPY LazyOP is a LazyBuffer on the CPU holding [2]" +assert lazyop.srcs[0].srcs[0].device == "CPU" +assert lazyop.srcs[0].srcs[0].realized._buf[0] == 2, "the src of the COPY LazyOP is a LazyBuffer on the CPU holding [2]" assert result.lazydata.base.realized is None, "the LazyBuffer is not realized yet" # now we realize the LazyBuffer diff --git a/test/external/external_test_example.py b/test/external/external_test_example.py index fd369618aa..c1b101933e 100644 --- a/test/external/external_test_example.py +++ b/test/external/external_test_example.py @@ -7,7 +7,7 @@ def multidevice_test(fxn): exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",") def ret(self): for device in Device._devices: - if device in ["DISK", "EXT", "FAKE"]: continue + if device in ["DISK", "FAKE"]: continue if not CI: print(device) if device in exclude_devices: if not CI: print(f"WARNING: {device} test is excluded") diff --git a/test/test_schedule.py b/test/test_schedule.py index 733cf0f3c5..3ba137fdbd 100644 --- a/test/test_schedule.py +++ b/test/test_schedule.py @@ -350,7 +350,7 @@ class TestSchedule(unittest.TestCase): def test_double_from(self): x = Tensor([1,2,3,4]) - out = x.to('ext') + out = x.to('cpu') check_schedule(out, 0, filter_loadops=False) def test_pow_const_tensor_simplified(self): diff --git a/tinygrad/codegen/kernel.py b/tinygrad/codegen/kernel.py index ab3bf1dbd4..cb111642ca 100644 --- a/tinygrad/codegen/kernel.py +++ b/tinygrad/codegen/kernel.py @@ -68,7 +68,7 @@ class LinearizerOptions(NamedTuple): class Kernel: def __init__(self, ast:LazyOp, opts:Optional[LinearizerOptions]=None): - self.opts = opts or (device.compiler.linearizer_opts if isinstance(device:=Device[Device.DEFAULT], Compiled) and device.compiler is not None else + self.opts = opts or (device.compiler.linearizer_opts if isinstance(device:=Device[Device.DEFAULT], Compiled) else LinearizerOptions(Device.DEFAULT)) self.ast = ast assert ast.op == BufferOps.STORE, f"kernels must have a store as the output, got {ast.op}" diff --git a/tinygrad/device.py b/tinygrad/device.py index 3eaecd7f7d..bbe20fb171 100644 --- a/tinygrad/device.py +++ b/tinygrad/device.py @@ -281,7 +281,6 @@ class CompiledASTRunner(JITRunner): if local_size is not None: local_size = local_size + [1]*(3-len(local_size)) self.name, self.display_name, self.prg, self.device, self.global_size, self.local_size, self.first_run = \ to_function_name(name), name, prg, device, global_size, local_size, True - assert self.device.compiler is not None, "compiler is reuired to make an AST kernel" lib:bytes = precompiled if precompiled is not None else self.device.compiler.compile_cached(prg) self.lib, self.clprg = lib, self.device.runtime(self.name, lib) self.vars: List[Variable] = [] @@ -313,17 +312,15 @@ class CompiledASTRunner(JITRunner): return et class Compiled: - def __init__(self, device:str, allocator:Allocator, compiler:Optional[Compiler], runtime, graph=None): + def __init__(self, device:str, allocator:Allocator, compiler:Compiler, runtime, graph=None): self.dname, self.allocator, self.compiler, self.runtime, self.graph = device, allocator, compiler, runtime, graph def synchronize(self): pass # override this in your device def to_program(self, k:Linearizer) -> CompiledASTRunner: - assert self.compiler is not None, "compiler is required to run AST" k.linearize() return CompiledASTRunner(k.ast, k.name, self.compiler.render(to_function_name(k.name), k.uops), self, k.global_size, k.local_size) def get_linearizer(self, ast:LazyOp) -> Linearizer: - assert self.compiler is not None, "compiler is required to build AST" if DEBUG >= 3: from tinygrad.features.graph import print_tree print_tree(ast) diff --git a/tinygrad/features/search.py b/tinygrad/features/search.py index 27cdd97de6..dcf8413305 100644 --- a/tinygrad/features/search.py +++ b/tinygrad/features/search.py @@ -153,7 +153,7 @@ def time_linearizer(lin:Linearizer, rawbufs:List[Buffer], allow_test_size=True, if not disable_cache and CACHELEVEL >= 2 and (val:=diskcache_get("time_linearizer", key)) is not None: return min(val) dev = Device[lin.opts.device] - assert isinstance(dev, Compiled) and dev.compiler is not None + assert isinstance(dev, Compiled) var_vals = {k:(k.max+k.min)//2 for k in lin.ast.vars()} lib, global_size, local_size = _compile_linearizer(dev.compiler, lin) diff --git a/tinygrad/runtime/ops_ext.py b/tinygrad/runtime/ops_ext.py deleted file mode 100644 index f4c8fc1fb6..0000000000 --- a/tinygrad/runtime/ops_ext.py +++ /dev/null @@ -1,12 +0,0 @@ -from typing import Tuple, Any -from tinygrad.device import Compiled, Allocator - -# the Any is an arbitrary object that's kept in scope with the memoryview -class ExtAllocator(Allocator): - # NOTE: this doesn't work with allow_zero_copy, it's read only somehow - #def as_buffer(self, src:Tuple[memoryview, Any]) -> memoryview: return src[0] - def copyin(self, dest:Tuple[memoryview, Any], src:memoryview): dest[0][:] = src - def copyout(self, dest:memoryview, src:Tuple[memoryview, Any]): dest[:] = src[0] - -class ExtDevice(Compiled): - def __init__(self, device:str): super().__init__(device, ExtAllocator(), None, None) diff --git a/tinygrad/runtime/ops_python.py b/tinygrad/runtime/ops_python.py index 0587536e6e..688d611025 100644 --- a/tinygrad/runtime/ops_python.py +++ b/tinygrad/runtime/ops_python.py @@ -93,7 +93,7 @@ class PythonProgram: ul[i] = [pbufs.pop(0).cast(dtype.fmt)] * warp_size elif uop is UOps.DEFINE_LOCAL: assert dtype.fmt is not None - lbuf = memoryview(bytearray(arg[1]*dtype.itemsize)) + lbuf = memoryview(bytearray(arg[1]*dtype.sz)) ul[i] = [lbuf.cast(dtype.fmt)] * warp_size elif uop is UOps.SPECIAL: if arg[1][0] == 'g': diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index ce540ef1ec..67ecf19165 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -7,7 +7,7 @@ from functools import partialmethod, reduce import numpy as np from tinygrad.dtype import DType, dtypes, ImageDType, Scalar, least_upper_float, least_upper_dtype -from tinygrad.helpers import argfix, make_pair, getenv, IMAGE, DEBUG, WINO, flatten, prod, all_int, round_up, merge_dicts, fully_flatten, flat_mv +from tinygrad.helpers import argfix, make_pair, getenv, IMAGE, DEBUG, WINO, flatten, prod, all_int, round_up, merge_dicts, fully_flatten from tinygrad.lazy import LazyBuffer from tinygrad.features.multi import MultiLazyBuffer from tinygrad.ops import LoadOps @@ -42,11 +42,8 @@ def _loadop(op, shape:Tuple[sint,...], dtype:DType, device:Union[str, Tuple[str, return MultiLazyBuffer([LazyBuffer.loadop(op, shape, dtype, d, arg, src) for d in device], None) def _fromcpu(x: np.ndarray) -> LazyBuffer: - ret = LazyBuffer.loadop(LoadOps.EMPTY, x.shape, dtypes.from_np(x.dtype), "EXT") - if x.size == 0: - ret.realized = Buffer("EXT", 0, dtypes.from_np(x.dtype), (memoryview(bytearray()), None)) - else: - ret.realized = Buffer("EXT", prod(x.shape), dtypes.from_np(x.dtype), (flat_mv(np.require(x, requirements='C').data), x)) + ret = LazyBuffer.loadop(LoadOps.EMPTY, x.shape, dtypes.from_np(x.dtype), "CPU") + ret.realized = Buffer("CPU", prod(x.shape), dtypes.from_np(x.dtype), x.flatten()) return ret def _get_winograd_matcols(mat, dims:int, shp:Tuple[sint, ...], device:Union[str, Tuple[str, ...]]) -> List[List[Tensor]]: