continue with dataloader implementation

2026-04-29 03:00:14 -04:00 · 2024-10-05 23:51:16 -07:00
parent 8ca848d542
commit 5dbebf460e
3 changed files with 34 additions and 19 deletions
--- a/examples/mlperf/dataloader.py
+++ b/examples/mlperf/dataloader.py
@@ -357,12 +357,14 @@ def batch_load_unet3d(preprocessed_dataset_dir:Path, batch_size:int=6, val:bool=
 ### RetinaNet

 def load_retinanet_data(base_dir:Path, queue_in:Queue, queue_out:Queue, X:Tensor):
-  from extra.datasets.openimages import image_load, prepare_target
+  from extra.datasets.openimages import image_load, prepare_target, random_horizontal_flip, resize
  while (data:=queue_in.get()) is not None:
    idx, img, ann = data
    img_id = img["id"]
-    img, img_size = image_load(base_dir, "train", img["file_name"]) # TODO: resize this with the target!
-    tgt = prepare_target(ann, img_id, img_size)
+    img = image_load(base_dir, "train", img["file_name"])
+    tgt = prepare_target(ann, img_id, img.size[::-1])
+    img, tgt = random_horizontal_flip(img, tgt)
+    img, _ = resize(img)

    X[idx].contiguous().realize().lazydata.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes()

--- a/examples/mlperf/model_eval.py
+++ b/examples/mlperf/model_eval.py
@@ -81,19 +81,12 @@ def eval_unet3d():

 def eval_retinanet():
  # RetinaNet with ResNeXt50_32X4D
+  from extra.datasets.openimages import normalize
  from extra.models.resnet import ResNeXt50_32X4D
  from extra.models.retinanet import RetinaNet
  mdl = RetinaNet(ResNeXt50_32X4D())
  mdl.load_from_pretrained()

-  input_mean = Tensor([0.485, 0.456, 0.406]).reshape(1, -1, 1, 1)
-  input_std = Tensor([0.229, 0.224, 0.225]).reshape(1, -1, 1, 1)
-  def input_fixup(x):
-    x = x.permute([0,3,1,2]) / 255.0
-    x -= input_mean
-    x /= input_std
-    return x
-
  from extra.datasets.openimages import download_dataset, iterate, BASEDIR
  from pycocotools.coco import COCO
  from pycocotools.cocoeval import COCOeval
@@ -103,7 +96,7 @@ def eval_retinanet():
  coco_evalimgs, evaluated_imgs, ncats, narea = [], [], len(coco_eval.params.catIds), len(coco_eval.params.areaRng)

  from tinygrad.engine.jit import TinyJit
-  mdlrun = TinyJit(lambda x: mdl(input_fixup(x)).realize())
+  mdlrun = TinyJit(lambda x: mdl(normalize(x)).realize())

  n, bs = 0, 8
  st = time.perf_counter()
@@ -114,7 +107,7 @@ def eval_retinanet():
      outs = mdlrun(dat).numpy()
    else:
      mdlrun._jit_cache = []
-      outs =  mdl(input_fixup(dat)).numpy()
+      outs =  mdl(normalize(dat)).numpy()
    et = time.perf_counter()
    predictions = mdl.postprocess_detections(outs, input_size=dat.shape[1:3], orig_image_sizes=[t["image_size"] for t in targets])
    ext = time.perf_counter()
--- a/extra/datasets/openimages.py
+++ b/extra/datasets/openimages.py
@@ -4,6 +4,7 @@ import numpy as np
 from PIL import Image
 from pathlib import Path
 import boto3, botocore
+from tinygrad import Tensor
 from tinygrad.helpers import fetch, tqdm, getenv
 import pandas as pd
 import concurrent.futures
@@ -139,11 +140,7 @@ def fetch_openimages(output_fn:str, base_dir:Path, subset:str):

 def image_load(base_dir, subset, fn):
  img_folder = base_dir / f"{subset}/data"
-  img = Image.open(img_folder / fn).convert('RGB')
-  import torchvision.transforms.functional as F
-  ret = F.resize(img, size=(800, 800))
-  ret = np.array(ret)
-  return ret, img.size[::-1]
+  return Image.open(img_folder / fn).convert('RGB')

 def prepare_target(annotations, img_id, img_size):
  boxes = [annot["bbox"] for annot in annotations]
@@ -164,7 +161,7 @@ def iterate(coco, base_dir, bs=8):
    X, targets  = [], []
    for img_id in image_ids[i:i+bs]:
      img_dict = coco.loadImgs(img_id)[0]
-      x, original_size = image_load(base_dir, img_dict['subset'], img_dict["file_name"])
+      x, original_size = resize(image_load(base_dir, img_dict['subset'], img_dict["file_name"]))
      X.append(x)
      annotations = coco.loadAnns(coco.getAnnIds(img_id))
      targets.append(prepare_target(annotations, img_id, original_size))
@@ -179,6 +176,29 @@ def download_dataset(base_dir:Path, subset:str) -> Path:

  return ann_file

+def random_horizontal_flip(img, tgt, prob=0.5):
+  import torch
+  import torchvision.transforms.functional as F
+  if torch.rand(1) < prob:
+    w = img.size[::-1]
+    img = F.hflip(img)
+    tgt["boxes"][:, [0, 2]] = w - tgt["boxes"][:, [2, 0]]
+  return img, tgt
+
+def resize(img, size=(800, 800)):
+  import torchvision.transforms.functional as F
+  img_size = img.size[::-1]
+  img = F.resize(img, size=size)
+  img = np.array(img)
+  return img, img_size
+
+def normalize(img):
+  mean = Tensor([0.485, 0.456, 0.406]).reshape(1, -1, 1, 1)
+  std = Tensor([0.229, 0.224, 0.225]).reshape(1, -1, 1, 1)
+  img = img.permute([0,3,1,2]) / 255.0
+  img -= mean
+  img /= std
+  return img

 if __name__ == "__main__":
  download_dataset(base_dir:=getenv("BASE_DIR", BASEDIR), "train")