diff --git a/examples/mlperf/dataloader.py b/examples/mlperf/dataloader.py index 40828f3ff8..7a69c8186f 100644 --- a/examples/mlperf/dataloader.py +++ b/examples/mlperf/dataloader.py @@ -357,12 +357,14 @@ def batch_load_unet3d(preprocessed_dataset_dir:Path, batch_size:int=6, val:bool= ### RetinaNet def load_retinanet_data(base_dir:Path, queue_in:Queue, queue_out:Queue, X:Tensor): - from extra.datasets.openimages import image_load, prepare_target + from extra.datasets.openimages import image_load, prepare_target, random_horizontal_flip, resize while (data:=queue_in.get()) is not None: idx, img, ann = data img_id = img["id"] - img, img_size = image_load(base_dir, "train", img["file_name"]) # TODO: resize this with the target! - tgt = prepare_target(ann, img_id, img_size) + img = image_load(base_dir, "train", img["file_name"]) + tgt = prepare_target(ann, img_id, img.size[::-1]) + img, tgt = random_horizontal_flip(img, tgt) + img, _ = resize(img) X[idx].contiguous().realize().lazydata.realized.as_buffer(force_zero_copy=True)[:] = img.tobytes() diff --git a/examples/mlperf/model_eval.py b/examples/mlperf/model_eval.py index 899484b4c5..acbd90c64b 100644 --- a/examples/mlperf/model_eval.py +++ b/examples/mlperf/model_eval.py @@ -81,19 +81,12 @@ def eval_unet3d(): def eval_retinanet(): # RetinaNet with ResNeXt50_32X4D + from extra.datasets.openimages import normalize from extra.models.resnet import ResNeXt50_32X4D from extra.models.retinanet import RetinaNet mdl = RetinaNet(ResNeXt50_32X4D()) mdl.load_from_pretrained() - input_mean = Tensor([0.485, 0.456, 0.406]).reshape(1, -1, 1, 1) - input_std = Tensor([0.229, 0.224, 0.225]).reshape(1, -1, 1, 1) - def input_fixup(x): - x = x.permute([0,3,1,2]) / 255.0 - x -= input_mean - x /= input_std - return x - from extra.datasets.openimages import download_dataset, iterate, BASEDIR from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval @@ -103,7 +96,7 @@ def eval_retinanet(): coco_evalimgs, evaluated_imgs, ncats, narea = [], [], len(coco_eval.params.catIds), len(coco_eval.params.areaRng) from tinygrad.engine.jit import TinyJit - mdlrun = TinyJit(lambda x: mdl(input_fixup(x)).realize()) + mdlrun = TinyJit(lambda x: mdl(normalize(x)).realize()) n, bs = 0, 8 st = time.perf_counter() @@ -114,7 +107,7 @@ def eval_retinanet(): outs = mdlrun(dat).numpy() else: mdlrun._jit_cache = [] - outs = mdl(input_fixup(dat)).numpy() + outs = mdl(normalize(dat)).numpy() et = time.perf_counter() predictions = mdl.postprocess_detections(outs, input_size=dat.shape[1:3], orig_image_sizes=[t["image_size"] for t in targets]) ext = time.perf_counter() diff --git a/extra/datasets/openimages.py b/extra/datasets/openimages.py index f9d2cbadf6..c86c162992 100644 --- a/extra/datasets/openimages.py +++ b/extra/datasets/openimages.py @@ -4,6 +4,7 @@ import numpy as np from PIL import Image from pathlib import Path import boto3, botocore +from tinygrad import Tensor from tinygrad.helpers import fetch, tqdm, getenv import pandas as pd import concurrent.futures @@ -139,11 +140,7 @@ def fetch_openimages(output_fn:str, base_dir:Path, subset:str): def image_load(base_dir, subset, fn): img_folder = base_dir / f"{subset}/data" - img = Image.open(img_folder / fn).convert('RGB') - import torchvision.transforms.functional as F - ret = F.resize(img, size=(800, 800)) - ret = np.array(ret) - return ret, img.size[::-1] + return Image.open(img_folder / fn).convert('RGB') def prepare_target(annotations, img_id, img_size): boxes = [annot["bbox"] for annot in annotations] @@ -164,7 +161,7 @@ def iterate(coco, base_dir, bs=8): X, targets = [], [] for img_id in image_ids[i:i+bs]: img_dict = coco.loadImgs(img_id)[0] - x, original_size = image_load(base_dir, img_dict['subset'], img_dict["file_name"]) + x, original_size = resize(image_load(base_dir, img_dict['subset'], img_dict["file_name"])) X.append(x) annotations = coco.loadAnns(coco.getAnnIds(img_id)) targets.append(prepare_target(annotations, img_id, original_size)) @@ -179,6 +176,29 @@ def download_dataset(base_dir:Path, subset:str) -> Path: return ann_file +def random_horizontal_flip(img, tgt, prob=0.5): + import torch + import torchvision.transforms.functional as F + if torch.rand(1) < prob: + w = img.size[::-1] + img = F.hflip(img) + tgt["boxes"][:, [0, 2]] = w - tgt["boxes"][:, [2, 0]] + return img, tgt + +def resize(img, size=(800, 800)): + import torchvision.transforms.functional as F + img_size = img.size[::-1] + img = F.resize(img, size=size) + img = np.array(img) + return img, img_size + +def normalize(img): + mean = Tensor([0.485, 0.456, 0.406]).reshape(1, -1, 1, 1) + std = Tensor([0.229, 0.224, 0.225]).reshape(1, -1, 1, 1) + img = img.permute([0,3,1,2]) / 255.0 + img -= mean + img /= std + return img if __name__ == "__main__": download_dataset(base_dir:=getenv("BASE_DIR", BASEDIR), "train")