tinygrad/extra/tinyfs/upload_raid.py

from pathlib import Path
import multiprocessing, json

from tinygrad.tensor import Tensor
from tinygrad.helpers import tqdm

raid_root = Path("/raid")

def upload_file(path: Path):
  pt = Tensor(path).realize()
  h = pt.store().realize()
  pt.uop.realized.deallocate()
  return h.data().hex(), path, pt.nbytes()

if __name__ == "__main__":
  raid_files = sorted([p for p in raid_root.rglob("*") if p.is_file()])
  print(f"found {len(raid_files)} files in /raid")

  mapping = {}
  with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
    for h, p, s in tqdm(pool.imap_unordered(upload_file, raid_files), total=len(raid_files)):
      mapping[p.relative_to(raid_root).as_posix()] = {"hash": h, "size": s}

  # sort the mapping by key
  mapping = dict(sorted(mapping.items()))

  mapping = json.dumps(mapping).encode()
  mapping_tensor = Tensor(mapping, device="CPU")
  h = mapping_tensor.store().realize()

  print(f"final hash: {h.data().hex()}, size: {len(mapping)}")