From 50927defad2ee1cabcbf09662b499415400cf8c9 Mon Sep 17 00:00:00 2001 From: chenyu Date: Fri, 22 Dec 2023 14:45:13 -0500 Subject: [PATCH] s/lazydata.realized/lazydata.base.realized/g (#2914) * s/lazydata.realized/lazydata.base.realized/g * not that --- docs/abstractions.py | 8 ++++---- examples/webgpu/stable_diffusion/compile.py | 2 +- extra/export_model.py | 4 ++-- openpilot/compile2.py | 2 +- test/external/external_multi_gpu.py | 4 ++-- test/test_dtype.py | 2 +- test/test_image_dtype.py | 18 +++++++++--------- test/test_linearizer.py | 2 +- test/test_zero_copy.py | 2 +- 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/docs/abstractions.py b/docs/abstractions.py index 7d185883a8..d56601f822 100644 --- a/docs/abstractions.py +++ b/docs/abstractions.py @@ -136,15 +136,15 @@ assert len(lazyop.srcs) == 2 assert lazyop.srcs[0].op == LoadOps.COPY assert lazyop.srcs[0].srcs[0].device == "CPU" assert lazyop.srcs[0].srcs[0].realized._buf[0] == 2, "the src of the COPY LazyOP is a LazyBuffer on the CPU holding [2.]" -assert result.lazydata.realized is None, "the LazyBuffer is not realized yet" +assert result.lazydata.base.realized is None, "the LazyBuffer is not realized yet" # now we realize the LazyBuffer result.realize() -assert result.lazydata.realized is not None, "the LazyBuffer is realized!" +assert result.lazydata.base.realized is not None, "the LazyBuffer is realized!" # this brings us nicely to DeviceBuffer, of which the realized ClangBuffer is a subclass -#assert 'RawMallocBuffer' in str(type(result.lazydata.realized)) +#assert 'RawMallocBuffer' in str(type(result.lazydata.base.realized)) # getting ahead of ourselves, but we can copy the DeviceBuffer toCPU -assert result.lazydata.realized.toCPU()[0] == 5, "when put in numpy with toCPU, it's 5" +assert result.lazydata.base.realized.toCPU()[0] == 5, "when put in numpy with toCPU, it's 5" # %% # == Union[Interpreted, Compiled] (in tinygrad/ops.py, code 5/10) == diff --git a/examples/webgpu/stable_diffusion/compile.py b/examples/webgpu/stable_diffusion/compile.py index 7984e63132..d0a5337667 100644 --- a/examples/webgpu/stable_diffusion/compile.py +++ b/examples/webgpu/stable_diffusion/compile.py @@ -98,7 +98,7 @@ if __name__ == "__main__": run, special_names = jit_model(step, *step.input) functions, statements, bufs, _ = compile_net(run, special_names) state = get_state_dict(model) - weights = {id(x.lazydata.realized): name for name, x in state.items()} + weights = {id(x.lazydata.base.realized): name for name, x in state.items()} kernel_code = '\n\n'.join([f"const {key} = `{code.replace(key, 'main')}`;" for key, code in functions.items()]) kernel_names = ', '.join([name for (name, _, _, _) in statements]) kernel_calls = '\n '.join([f"addComputePass(device, commandEncoder, piplines[{i}], [{', '.join(args)}], {global_size});" for i, (_name, args, global_size, _local_size) in enumerate(statements) ]) diff --git a/extra/export_model.py b/extra/export_model.py index fe4bdd1d9e..9c29c1ffb0 100644 --- a/extra/export_model.py +++ b/extra/export_model.py @@ -42,13 +42,13 @@ def jit_model(model, *args) -> Tuple[TinyJit,Dict[int,str]]: # hack to put the inputs back for (j,i),idx in run.input_replace.items(): - realized_input = args[idx].lazydata.realized + realized_input = args[idx].lazydata.base.realized run.jit_cache[j].rawbufs[i] = realized_input special_names[id(realized_input)] = f'input{idx}' # TODO: fetch this from the jit in self.input_replace and self.ret (hint: use get_parameters on self.ret) for i, output in enumerate(the_output): - special_names[id(output.lazydata.realized)] = f'output{i}' + special_names[id(output.lazydata.base.realized)] = f'output{i}' return run, special_names def export_model_clang(functions:Dict[str,str], statements:Dict[str,Tuple[str,int,int]], bufs:Dict[str,Tuple[str,int,int]], bufs_to_save:Dict[str,Tensor], input_names:List[str], output_names:List[str]) -> str: diff --git a/openpilot/compile2.py b/openpilot/compile2.py index d71199664b..d426f172b2 100644 --- a/openpilot/compile2.py +++ b/openpilot/compile2.py @@ -83,7 +83,7 @@ def test_vs_onnx(onnx_data, schedule:Optional[List[ScheduleItem]], inputs:Dict[s return # set inputs - for k,v in inputs.items(): v.lazydata.realized.copyin(new_np_inputs[k].data) + for k,v in inputs.items(): v.lazydata.base.realized.copyin(new_np_inputs[k].data) # run code (all buffers have been allocated) GlobalCounters.reset() diff --git a/test/external/external_multi_gpu.py b/test/external/external_multi_gpu.py index 542f1e6218..4721ac845a 100644 --- a/test/external/external_multi_gpu.py +++ b/test/external/external_multi_gpu.py @@ -21,8 +21,8 @@ if __name__ == "__main__": with Timing("CPU creation: ", on_exit=lambda x: f", {(sz*4*2)/x:.2f} GB/sec"): c0 = (Tensor.ones(sz, device="clang")/2).realize() c1 = (Tensor.ones(sz, device="clang")/4).realize() - print(c0.lazydata.realized) - print(c1.lazydata.realized) + print(c0.lazydata.base.realized) + print(c1.lazydata.base.realized) with Timing("CPU -> 0: ", on_exit=lambda x: f", {(sz*4)/x:.2f} GB/sec"): a0 = c0.to(d0).realize() diff --git a/test/test_dtype.py b/test/test_dtype.py index 231dd704c9..9a9c81b0fc 100644 --- a/test/test_dtype.py +++ b/test/test_dtype.py @@ -30,7 +30,7 @@ def get_available_cast_dtypes(dtype: DType) -> List[DType]: def _test_to_np(a:Tensor, np_dtype, target): if DEBUG >= 2: print(a) na = a.numpy() - if DEBUG >= 2: print(na, na.dtype, a.lazydata.realized) + if DEBUG >= 2: print(na, na.dtype, a.lazydata.base.realized) try: assert na.dtype == np_dtype np.testing.assert_allclose(na, target) diff --git a/test/test_image_dtype.py b/test/test_image_dtype.py index a84cfc059c..7aeb00f98f 100644 --- a/test/test_image_dtype.py +++ b/test/test_image_dtype.py @@ -10,14 +10,14 @@ class TestImageDType(unittest.TestCase): data = Tensor.randn(9*27*4).realize() tst = data.numpy() it = data.cast(dtypes.imagef((9,27,4))).realize() - assert isinstance(it.lazydata.realized.dtype, ImageDType) + assert isinstance(it.lazydata.base.realized.dtype, ImageDType) np.testing.assert_equal(tst, it.numpy()) def test_image_and_back_wrong_shape(self): data = Tensor.randn(9*27*4).realize() tst = data.numpy() it = data.cast(dtypes.imagef((9,12,4))).realize() - assert not isinstance(it.lazydata.realized.dtype, ImageDType) + assert not isinstance(it.lazydata.base.realized.dtype, ImageDType) np.testing.assert_equal(tst, it.numpy()) def test_shrink_load_float(self): @@ -28,7 +28,7 @@ class TestImageDType(unittest.TestCase): def test_mul_stays_image(self): it = Tensor.randn(4).cast(dtypes.imagef((1,1,4))).realize() out = (it*2).realize() - assert isinstance(out.lazydata.realized.dtype, ImageDType) + assert isinstance(out.lazydata.base.realized.dtype, ImageDType) def test_shrink_max(self): it = Tensor.randn(8).cast(dtypes.imagef((1,2,4))).realize() @@ -43,26 +43,26 @@ class TestImageDType(unittest.TestCase): def test_lru_alloc(self): data = Tensor.randn(9*27*4).realize() it = data.cast(dtypes.imagef((9,27,4))).realize() - b1 = it.lazydata.realized._buf + b1 = it.lazydata.base.realized._buf del it it = data.cast(dtypes.imagef((9,27,4))).realize() - assert it.lazydata.realized._buf == b1 + assert it.lazydata.base.realized._buf == b1 def test_no_lru_alloc(self): data = Tensor.randn(9*27*4).realize() it = data.cast(dtypes.imagef((9,27,4))).realize() - b1 = it.lazydata.realized._buf + b1 = it.lazydata.base.realized._buf del it it = data.cast(dtypes.imagef((10,27,4))).realize() - assert it.lazydata.realized._buf != b1 + assert it.lazydata.base.realized._buf != b1 def test_no_lru_alloc_dtype(self): data = Tensor.randn(9*27*4).realize() it = data.cast(dtypes.imagef((9,27,4))).realize() - b1 = it.lazydata.realized._buf + b1 = it.lazydata.base.realized._buf del it it = data.cast(dtypes.imageh((9,27,4))).realize() - assert it.lazydata.realized._buf != b1 + assert it.lazydata.base.realized._buf != b1 class TestImageIdx(unittest.TestCase): def test_to_image_idx_real1(self): diff --git a/test/test_linearizer.py b/test/test_linearizer.py index 0a32464852..7b9209a250 100644 --- a/test/test_linearizer.py +++ b/test/test_linearizer.py @@ -21,7 +21,7 @@ class TestLinearizer(unittest.TestCase): CacheCollector.start() c = ((a.shrink(((0, 2),)) - a.shrink(((2, 4),))) - (b.shrink(((0, 2),)) - b.shrink(((2, 4),)))).realize() rawbufs = CacheCollector.finish()[0].rawbufs - assert len(rawbufs) == 3 and set(rawbufs[1:]) == {a.lazydata.realized, b.lazydata.realized} + assert len(rawbufs) == 3 and set(rawbufs[1:]) == {a.lazydata.base.realized, b.lazydata.base.realized} np_c = (np_a[:2] - np_a[2:]) - (np_b[:2] - np_b[2:]) np.testing.assert_allclose(np_c, c.numpy(), atol=1e-4, rtol=1e-4) diff --git a/test/test_zero_copy.py b/test/test_zero_copy.py index 2d458cdf22..a462073a72 100644 --- a/test/test_zero_copy.py +++ b/test/test_zero_copy.py @@ -7,7 +7,7 @@ def time_tensor_numpy(out:Tensor): times = [] for _ in range(5): st = time.perf_counter() - out.lazydata.realized.toCPU() + out.lazydata.base.realized.toCPU() et = time.perf_counter() - st times.append(et) return min(times)