tinyfs tweaks (#13444)

This commit is contained in:
wozeparrot
2025-11-24 18:07:32 -08:00
committed by GitHub
parent f46bc31156
commit 249553a119
6 changed files with 19 additions and 17 deletions

View File

@@ -8,4 +8,4 @@ if __name__ == "__main__":
parser.add_argument("--dest", type=str, required=True, help="destination path to save the file")
args = parser.parse_args()
Tensor(bytes.fromhex(args.hash), device="CPU").load(args.len).to(f"disk:{args.dest}").realize()
Tensor(bytes.fromhex(args.hash), device="CPU").fs_load(args.len).to(f"disk:{args.dest}").realize()

View File

@@ -1,4 +1,4 @@
import json, multiprocessing
import json, multiprocessing, functools
from pathlib import Path
from tinygrad.tensor import Tensor
@@ -14,23 +14,25 @@ def fetch_file(item):
path.parent.mkdir(parents=True, exist_ok=True)
try:
pt = Tensor(bytes.fromhex(h), device="CPU").load(size).to(f"disk:{path.as_posix()}").realize()
pt = Tensor(bytes.fromhex(h), device="CPU").fs_load(size).to(f"disk:{path.as_posix()}").realize()
except Exception as e:
print(f"error fetching {path}, {h}, {size}: {e}")
raise
pt.uop.buffer.deallocate()
def fetch_mapping():
mapping_tensor = Tensor(bytes.fromhex("d734f5e3be9f1e9d863bfaa4fc6c1ef2")).load(175866113).realize()
def fetch_mapping(h, l):
mapping_tensor = Tensor(bytes.fromhex(h)).fs_load(l).realize()
mapping = mapping_tensor.data().tobytes().decode()
mapping = json.loads(mapping)
mapped_files = mapping.items()
return list(mapped_files)
if __name__ == "__main__":
h, l = getenv("HASH", "d734f5e3be9f1e9d863bfaa4fc6c1ef2"), getenv("LENGTH", 175866113)
with multiprocessing.Pool(processes=1) as pool:
mapped_files = pool.apply(fetch_mapping)
mapped_files = pool.apply(functools.partial(fetch_mapping, h, l))
print(f"fetched mapping for {len(mapped_files)} files")

View File

@@ -8,7 +8,7 @@ raid_root = Path("/raid")
def upload_file(path: Path):
pt = Tensor(path).realize()
h = pt.store().realize()
h = pt.fs_store().realize()
pt.uop.realized.deallocate()
return h.data().hex(), path, pt.nbytes()
@@ -26,6 +26,6 @@ if __name__ == "__main__":
mapping = json.dumps(mapping).encode()
mapping_tensor = Tensor(mapping, device="CPU")
h = mapping_tensor.store().realize()
h = mapping_tensor.fs_store().realize()
print(f"final hash: {h.data().hex()}, size: {len(mapping)}")

View File

@@ -3,19 +3,19 @@ from tinygrad import Tensor
class TestLoadStore(unittest.TestCase):
def test_load_shape(self):
t = Tensor(bytes(16)).load(1024).kernelize()
t = Tensor(bytes(16)).fs_load(1024).kernelize()
assert t.shape == (1024,), t.shape
def test_store_shape(self):
t = Tensor.zeros(1024).store().kernelize()
t = Tensor.zeros(1024).fs_store().kernelize()
assert t.shape == (16,), t.shape
def test_load_large_shape(self):
t = Tensor(bytes(16)).load(10_000_000).kernelize()
t = Tensor(bytes(16)).fs_load(10_000_000).kernelize()
assert t.shape == (10_000_000,), t.shape
def test_store_large_shape(self):
t = Tensor.zeros(10_000_000).store().kernelize()
t = Tensor.zeros(10_000_000).fs_store().kernelize()
assert t.shape == (16,), t.shape
if __name__ == "__main__":

View File

@@ -1,4 +1,4 @@
import socket, json, asyncio, threading
import socket, json, asyncio, threading, math
from contextlib import asynccontextmanager
from tinygrad.device import Compiled, Allocator
from tinygrad.helpers import DEBUG, getenv
@@ -92,9 +92,9 @@ class TinyFSAllocator(Allocator[TinyFSDevice]):
if dest.device.op == "LOAD":
locs = self.dev.sfile.readline()
dest.copyout_queue = json.loads(locs)
dest.hash_buf[:] = src.tobytes()
dest.hash_buf = src.tobytes()
elif dest.device.op == "STORE":
expected_hashes = dest.size // Tensor.CHUNK_SIZE
expected_hashes = math.ceil(dest.size / Tensor.CHUNK_SIZE)
dest.hash_buf = bytearray(expected_hashes * 16)
self.dev.sfile.readinto(dest.hash_buf)

View File

@@ -423,7 +423,7 @@ class Tensor(OpMixin):
return self.replace(self.shard(devices, axis))
CHUNK_SIZE = 2**20
def load(self, size:int) -> Tensor:
def fs_load(self, size:int) -> Tensor:
"""
Load a tensor from storage.
@@ -451,7 +451,7 @@ class Tensor(OpMixin):
return data[:size]
def store(self) -> Tensor:
def fs_store(self) -> Tensor:
"""
Store a tensor to storage.
"""