mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
s/lazydata.realized/lazydata.base.realized/g (#2914)
* s/lazydata.realized/lazydata.base.realized/g * not that
This commit is contained in:
@@ -136,15 +136,15 @@ assert len(lazyop.srcs) == 2
|
||||
assert lazyop.srcs[0].op == LoadOps.COPY
|
||||
assert lazyop.srcs[0].srcs[0].device == "CPU"
|
||||
assert lazyop.srcs[0].srcs[0].realized._buf[0] == 2, "the src of the COPY LazyOP is a LazyBuffer on the CPU holding [2.]"
|
||||
assert result.lazydata.realized is None, "the LazyBuffer is not realized yet"
|
||||
assert result.lazydata.base.realized is None, "the LazyBuffer is not realized yet"
|
||||
|
||||
# now we realize the LazyBuffer
|
||||
result.realize()
|
||||
assert result.lazydata.realized is not None, "the LazyBuffer is realized!"
|
||||
assert result.lazydata.base.realized is not None, "the LazyBuffer is realized!"
|
||||
# this brings us nicely to DeviceBuffer, of which the realized ClangBuffer is a subclass
|
||||
#assert 'RawMallocBuffer' in str(type(result.lazydata.realized))
|
||||
#assert 'RawMallocBuffer' in str(type(result.lazydata.base.realized))
|
||||
# getting ahead of ourselves, but we can copy the DeviceBuffer toCPU
|
||||
assert result.lazydata.realized.toCPU()[0] == 5, "when put in numpy with toCPU, it's 5"
|
||||
assert result.lazydata.base.realized.toCPU()[0] == 5, "when put in numpy with toCPU, it's 5"
|
||||
|
||||
# %%
|
||||
# == Union[Interpreted, Compiled] (in tinygrad/ops.py, code 5/10) ==
|
||||
|
||||
@@ -98,7 +98,7 @@ if __name__ == "__main__":
|
||||
run, special_names = jit_model(step, *step.input)
|
||||
functions, statements, bufs, _ = compile_net(run, special_names)
|
||||
state = get_state_dict(model)
|
||||
weights = {id(x.lazydata.realized): name for name, x in state.items()}
|
||||
weights = {id(x.lazydata.base.realized): name for name, x in state.items()}
|
||||
kernel_code = '\n\n'.join([f"const {key} = `{code.replace(key, 'main')}`;" for key, code in functions.items()])
|
||||
kernel_names = ', '.join([name for (name, _, _, _) in statements])
|
||||
kernel_calls = '\n '.join([f"addComputePass(device, commandEncoder, piplines[{i}], [{', '.join(args)}], {global_size});" for i, (_name, args, global_size, _local_size) in enumerate(statements) ])
|
||||
|
||||
@@ -42,13 +42,13 @@ def jit_model(model, *args) -> Tuple[TinyJit,Dict[int,str]]:
|
||||
|
||||
# hack to put the inputs back
|
||||
for (j,i),idx in run.input_replace.items():
|
||||
realized_input = args[idx].lazydata.realized
|
||||
realized_input = args[idx].lazydata.base.realized
|
||||
run.jit_cache[j].rawbufs[i] = realized_input
|
||||
special_names[id(realized_input)] = f'input{idx}'
|
||||
|
||||
# TODO: fetch this from the jit in self.input_replace and self.ret (hint: use get_parameters on self.ret)
|
||||
for i, output in enumerate(the_output):
|
||||
special_names[id(output.lazydata.realized)] = f'output{i}'
|
||||
special_names[id(output.lazydata.base.realized)] = f'output{i}'
|
||||
return run, special_names
|
||||
|
||||
def export_model_clang(functions:Dict[str,str], statements:Dict[str,Tuple[str,int,int]], bufs:Dict[str,Tuple[str,int,int]], bufs_to_save:Dict[str,Tensor], input_names:List[str], output_names:List[str]) -> str:
|
||||
|
||||
@@ -83,7 +83,7 @@ def test_vs_onnx(onnx_data, schedule:Optional[List[ScheduleItem]], inputs:Dict[s
|
||||
return
|
||||
|
||||
# set inputs
|
||||
for k,v in inputs.items(): v.lazydata.realized.copyin(new_np_inputs[k].data)
|
||||
for k,v in inputs.items(): v.lazydata.base.realized.copyin(new_np_inputs[k].data)
|
||||
|
||||
# run code (all buffers have been allocated)
|
||||
GlobalCounters.reset()
|
||||
|
||||
4
test/external/external_multi_gpu.py
vendored
4
test/external/external_multi_gpu.py
vendored
@@ -21,8 +21,8 @@ if __name__ == "__main__":
|
||||
with Timing("CPU creation: ", on_exit=lambda x: f", {(sz*4*2)/x:.2f} GB/sec"):
|
||||
c0 = (Tensor.ones(sz, device="clang")/2).realize()
|
||||
c1 = (Tensor.ones(sz, device="clang")/4).realize()
|
||||
print(c0.lazydata.realized)
|
||||
print(c1.lazydata.realized)
|
||||
print(c0.lazydata.base.realized)
|
||||
print(c1.lazydata.base.realized)
|
||||
|
||||
with Timing("CPU -> 0: ", on_exit=lambda x: f", {(sz*4)/x:.2f} GB/sec"):
|
||||
a0 = c0.to(d0).realize()
|
||||
|
||||
@@ -30,7 +30,7 @@ def get_available_cast_dtypes(dtype: DType) -> List[DType]:
|
||||
def _test_to_np(a:Tensor, np_dtype, target):
|
||||
if DEBUG >= 2: print(a)
|
||||
na = a.numpy()
|
||||
if DEBUG >= 2: print(na, na.dtype, a.lazydata.realized)
|
||||
if DEBUG >= 2: print(na, na.dtype, a.lazydata.base.realized)
|
||||
try:
|
||||
assert na.dtype == np_dtype
|
||||
np.testing.assert_allclose(na, target)
|
||||
|
||||
@@ -10,14 +10,14 @@ class TestImageDType(unittest.TestCase):
|
||||
data = Tensor.randn(9*27*4).realize()
|
||||
tst = data.numpy()
|
||||
it = data.cast(dtypes.imagef((9,27,4))).realize()
|
||||
assert isinstance(it.lazydata.realized.dtype, ImageDType)
|
||||
assert isinstance(it.lazydata.base.realized.dtype, ImageDType)
|
||||
np.testing.assert_equal(tst, it.numpy())
|
||||
|
||||
def test_image_and_back_wrong_shape(self):
|
||||
data = Tensor.randn(9*27*4).realize()
|
||||
tst = data.numpy()
|
||||
it = data.cast(dtypes.imagef((9,12,4))).realize()
|
||||
assert not isinstance(it.lazydata.realized.dtype, ImageDType)
|
||||
assert not isinstance(it.lazydata.base.realized.dtype, ImageDType)
|
||||
np.testing.assert_equal(tst, it.numpy())
|
||||
|
||||
def test_shrink_load_float(self):
|
||||
@@ -28,7 +28,7 @@ class TestImageDType(unittest.TestCase):
|
||||
def test_mul_stays_image(self):
|
||||
it = Tensor.randn(4).cast(dtypes.imagef((1,1,4))).realize()
|
||||
out = (it*2).realize()
|
||||
assert isinstance(out.lazydata.realized.dtype, ImageDType)
|
||||
assert isinstance(out.lazydata.base.realized.dtype, ImageDType)
|
||||
|
||||
def test_shrink_max(self):
|
||||
it = Tensor.randn(8).cast(dtypes.imagef((1,2,4))).realize()
|
||||
@@ -43,26 +43,26 @@ class TestImageDType(unittest.TestCase):
|
||||
def test_lru_alloc(self):
|
||||
data = Tensor.randn(9*27*4).realize()
|
||||
it = data.cast(dtypes.imagef((9,27,4))).realize()
|
||||
b1 = it.lazydata.realized._buf
|
||||
b1 = it.lazydata.base.realized._buf
|
||||
del it
|
||||
it = data.cast(dtypes.imagef((9,27,4))).realize()
|
||||
assert it.lazydata.realized._buf == b1
|
||||
assert it.lazydata.base.realized._buf == b1
|
||||
|
||||
def test_no_lru_alloc(self):
|
||||
data = Tensor.randn(9*27*4).realize()
|
||||
it = data.cast(dtypes.imagef((9,27,4))).realize()
|
||||
b1 = it.lazydata.realized._buf
|
||||
b1 = it.lazydata.base.realized._buf
|
||||
del it
|
||||
it = data.cast(dtypes.imagef((10,27,4))).realize()
|
||||
assert it.lazydata.realized._buf != b1
|
||||
assert it.lazydata.base.realized._buf != b1
|
||||
|
||||
def test_no_lru_alloc_dtype(self):
|
||||
data = Tensor.randn(9*27*4).realize()
|
||||
it = data.cast(dtypes.imagef((9,27,4))).realize()
|
||||
b1 = it.lazydata.realized._buf
|
||||
b1 = it.lazydata.base.realized._buf
|
||||
del it
|
||||
it = data.cast(dtypes.imageh((9,27,4))).realize()
|
||||
assert it.lazydata.realized._buf != b1
|
||||
assert it.lazydata.base.realized._buf != b1
|
||||
|
||||
class TestImageIdx(unittest.TestCase):
|
||||
def test_to_image_idx_real1(self):
|
||||
|
||||
@@ -21,7 +21,7 @@ class TestLinearizer(unittest.TestCase):
|
||||
CacheCollector.start()
|
||||
c = ((a.shrink(((0, 2),)) - a.shrink(((2, 4),))) - (b.shrink(((0, 2),)) - b.shrink(((2, 4),)))).realize()
|
||||
rawbufs = CacheCollector.finish()[0].rawbufs
|
||||
assert len(rawbufs) == 3 and set(rawbufs[1:]) == {a.lazydata.realized, b.lazydata.realized}
|
||||
assert len(rawbufs) == 3 and set(rawbufs[1:]) == {a.lazydata.base.realized, b.lazydata.base.realized}
|
||||
np_c = (np_a[:2] - np_a[2:]) - (np_b[:2] - np_b[2:])
|
||||
np.testing.assert_allclose(np_c, c.numpy(), atol=1e-4, rtol=1e-4)
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ def time_tensor_numpy(out:Tensor):
|
||||
times = []
|
||||
for _ in range(5):
|
||||
st = time.perf_counter()
|
||||
out.lazydata.realized.toCPU()
|
||||
out.lazydata.base.realized.toCPU()
|
||||
et = time.perf_counter() - st
|
||||
times.append(et)
|
||||
return min(times)
|
||||
|
||||
Reference in New Issue
Block a user