tinyfs tweaks (#13444)

This commit is contained in:
wozeparrot
2025-11-24 18:07:32 -08:00
committed by GitHub
parent f46bc31156
commit 249553a119
6 changed files with 19 additions and 17 deletions

View File

@@ -8,4 +8,4 @@ if __name__ == "__main__":
parser.add_argument("--dest", type=str, required=True, help="destination path to save the file") parser.add_argument("--dest", type=str, required=True, help="destination path to save the file")
args = parser.parse_args() args = parser.parse_args()
Tensor(bytes.fromhex(args.hash), device="CPU").load(args.len).to(f"disk:{args.dest}").realize() Tensor(bytes.fromhex(args.hash), device="CPU").fs_load(args.len).to(f"disk:{args.dest}").realize()

View File

@@ -1,4 +1,4 @@
import json, multiprocessing import json, multiprocessing, functools
from pathlib import Path from pathlib import Path
from tinygrad.tensor import Tensor from tinygrad.tensor import Tensor
@@ -14,23 +14,25 @@ def fetch_file(item):
path.parent.mkdir(parents=True, exist_ok=True) path.parent.mkdir(parents=True, exist_ok=True)
try: try:
pt = Tensor(bytes.fromhex(h), device="CPU").load(size).to(f"disk:{path.as_posix()}").realize() pt = Tensor(bytes.fromhex(h), device="CPU").fs_load(size).to(f"disk:{path.as_posix()}").realize()
except Exception as e: except Exception as e:
print(f"error fetching {path}, {h}, {size}: {e}") print(f"error fetching {path}, {h}, {size}: {e}")
raise raise
pt.uop.buffer.deallocate() pt.uop.buffer.deallocate()
def fetch_mapping(): def fetch_mapping(h, l):
mapping_tensor = Tensor(bytes.fromhex("d734f5e3be9f1e9d863bfaa4fc6c1ef2")).load(175866113).realize() mapping_tensor = Tensor(bytes.fromhex(h)).fs_load(l).realize()
mapping = mapping_tensor.data().tobytes().decode() mapping = mapping_tensor.data().tobytes().decode()
mapping = json.loads(mapping) mapping = json.loads(mapping)
mapped_files = mapping.items() mapped_files = mapping.items()
return list(mapped_files) return list(mapped_files)
if __name__ == "__main__": if __name__ == "__main__":
h, l = getenv("HASH", "d734f5e3be9f1e9d863bfaa4fc6c1ef2"), getenv("LENGTH", 175866113)
with multiprocessing.Pool(processes=1) as pool: with multiprocessing.Pool(processes=1) as pool:
mapped_files = pool.apply(fetch_mapping) mapped_files = pool.apply(functools.partial(fetch_mapping, h, l))
print(f"fetched mapping for {len(mapped_files)} files") print(f"fetched mapping for {len(mapped_files)} files")

View File

@@ -8,7 +8,7 @@ raid_root = Path("/raid")
def upload_file(path: Path): def upload_file(path: Path):
pt = Tensor(path).realize() pt = Tensor(path).realize()
h = pt.store().realize() h = pt.fs_store().realize()
pt.uop.realized.deallocate() pt.uop.realized.deallocate()
return h.data().hex(), path, pt.nbytes() return h.data().hex(), path, pt.nbytes()
@@ -26,6 +26,6 @@ if __name__ == "__main__":
mapping = json.dumps(mapping).encode() mapping = json.dumps(mapping).encode()
mapping_tensor = Tensor(mapping, device="CPU") mapping_tensor = Tensor(mapping, device="CPU")
h = mapping_tensor.store().realize() h = mapping_tensor.fs_store().realize()
print(f"final hash: {h.data().hex()}, size: {len(mapping)}") print(f"final hash: {h.data().hex()}, size: {len(mapping)}")

View File

@@ -3,19 +3,19 @@ from tinygrad import Tensor
class TestLoadStore(unittest.TestCase): class TestLoadStore(unittest.TestCase):
def test_load_shape(self): def test_load_shape(self):
t = Tensor(bytes(16)).load(1024).kernelize() t = Tensor(bytes(16)).fs_load(1024).kernelize()
assert t.shape == (1024,), t.shape assert t.shape == (1024,), t.shape
def test_store_shape(self): def test_store_shape(self):
t = Tensor.zeros(1024).store().kernelize() t = Tensor.zeros(1024).fs_store().kernelize()
assert t.shape == (16,), t.shape assert t.shape == (16,), t.shape
def test_load_large_shape(self): def test_load_large_shape(self):
t = Tensor(bytes(16)).load(10_000_000).kernelize() t = Tensor(bytes(16)).fs_load(10_000_000).kernelize()
assert t.shape == (10_000_000,), t.shape assert t.shape == (10_000_000,), t.shape
def test_store_large_shape(self): def test_store_large_shape(self):
t = Tensor.zeros(10_000_000).store().kernelize() t = Tensor.zeros(10_000_000).fs_store().kernelize()
assert t.shape == (16,), t.shape assert t.shape == (16,), t.shape
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -1,4 +1,4 @@
import socket, json, asyncio, threading import socket, json, asyncio, threading, math
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from tinygrad.device import Compiled, Allocator from tinygrad.device import Compiled, Allocator
from tinygrad.helpers import DEBUG, getenv from tinygrad.helpers import DEBUG, getenv
@@ -92,9 +92,9 @@ class TinyFSAllocator(Allocator[TinyFSDevice]):
if dest.device.op == "LOAD": if dest.device.op == "LOAD":
locs = self.dev.sfile.readline() locs = self.dev.sfile.readline()
dest.copyout_queue = json.loads(locs) dest.copyout_queue = json.loads(locs)
dest.hash_buf[:] = src.tobytes() dest.hash_buf = src.tobytes()
elif dest.device.op == "STORE": elif dest.device.op == "STORE":
expected_hashes = dest.size // Tensor.CHUNK_SIZE expected_hashes = math.ceil(dest.size / Tensor.CHUNK_SIZE)
dest.hash_buf = bytearray(expected_hashes * 16) dest.hash_buf = bytearray(expected_hashes * 16)
self.dev.sfile.readinto(dest.hash_buf) self.dev.sfile.readinto(dest.hash_buf)

View File

@@ -423,7 +423,7 @@ class Tensor(OpMixin):
return self.replace(self.shard(devices, axis)) return self.replace(self.shard(devices, axis))
CHUNK_SIZE = 2**20 CHUNK_SIZE = 2**20
def load(self, size:int) -> Tensor: def fs_load(self, size:int) -> Tensor:
""" """
Load a tensor from storage. Load a tensor from storage.
@@ -451,7 +451,7 @@ class Tensor(OpMixin):
return data[:size] return data[:size]
def store(self) -> Tensor: def fs_store(self) -> Tensor:
""" """
Store a tensor to storage. Store a tensor to storage.
""" """