From bc82f8c5be59b544aea61a09245533335f567d5f Mon Sep 17 00:00:00 2001 From: chenyu Date: Fri, 27 Sep 2024 11:11:43 -0400 Subject: [PATCH 1/2] use where in dropout (#6758) should save memory since we only store mask in bool instead of the upcasted used in mul --- tinygrad/tensor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index c56bb5bddf..771a6cd5f3 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -3090,7 +3090,7 @@ class Tensor: ``` """ if not Tensor.training or p == 0: return self - return self * (Tensor.rand_like(self, requires_grad=False, dtype=dtypes.default_float) >= p) * (1/(1.0 - p)) + return (Tensor.rand_like(self, requires_grad=False, dtype=dtypes.default_float) >= p).where(self, 0) * (1/(1.0 - p)) def one_hot(self, num_classes:int=-1) -> Tensor: """ From d3a387be6309f46b69b8298be6a8d1e5994a4731 Mon Sep 17 00:00:00 2001 From: Francis Lata Date: Fri, 27 Sep 2024 11:13:56 -0400 Subject: [PATCH 2/2] [MLPerf] Prepare openimages dataset script (#6747) * prepare openimages for MLPerf * cleanup * fix issue when clearing jit_cache on retinanet eval * revert pandas specific changes --- examples/mlperf/model_eval.py | 8 +++---- extra/datasets/openimages.py | 42 +++++++++++++++++++---------------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/examples/mlperf/model_eval.py b/examples/mlperf/model_eval.py index 058a765d2e..899484b4c5 100644 --- a/examples/mlperf/model_eval.py +++ b/examples/mlperf/model_eval.py @@ -94,11 +94,11 @@ def eval_retinanet(): x /= input_std return x - from extra.datasets.openimages import openimages, iterate + from extra.datasets.openimages import download_dataset, iterate, BASEDIR from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval from contextlib import redirect_stdout - coco = COCO(openimages('validation')) + coco = COCO(download_dataset(base_dir:=getenv("BASE_DIR", BASEDIR), 'validation')) coco_eval = COCOeval(coco, iouType="bbox") coco_evalimgs, evaluated_imgs, ncats, narea = [], [], len(coco_eval.params.catIds), len(coco_eval.params.areaRng) @@ -107,13 +107,13 @@ def eval_retinanet(): n, bs = 0, 8 st = time.perf_counter() - for x, targets in iterate(coco, bs): + for x, targets in iterate(coco, base_dir, bs): dat = Tensor(x.astype(np.float32)) mt = time.perf_counter() if dat.shape[0] == bs: outs = mdlrun(dat).numpy() else: - mdlrun.jit_cache = None + mdlrun._jit_cache = [] outs = mdl(input_fixup(dat)).numpy() et = time.perf_counter() predictions = mdl.postprocess_detections(outs, input_size=dat.shape[1:3], orig_image_sizes=[t["image_size"] for t in targets]) diff --git a/extra/datasets/openimages.py b/extra/datasets/openimages.py index a6449e2954..f9d2cbadf6 100644 --- a/extra/datasets/openimages.py +++ b/extra/datasets/openimages.py @@ -2,14 +2,13 @@ import sys import json import numpy as np from PIL import Image -import pathlib +from pathlib import Path import boto3, botocore -from tinygrad.helpers import fetch -from tqdm import tqdm +from tinygrad.helpers import fetch, tqdm, getenv import pandas as pd import concurrent.futures -BASEDIR = pathlib.Path(__file__).parent / "open-images-v6-mlperf" +BASEDIR = Path(__file__).parent / "open-images-v6-mlperf" BUCKET_NAME = "open-images-dataset" TRAIN_BBOX_ANNOTATIONS_URL = "https://storage.googleapis.com/openimages/v6/oidv6-train-annotations-bbox.csv" VALIDATION_BBOX_ANNOTATIONS_URL = "https://storage.googleapis.com/openimages/v5/validation-annotations-bbox.csv" @@ -55,17 +54,12 @@ MLPERF_CLASSES = ['Airplane', 'Antelope', 'Apple', 'Backpack', 'Balloon', 'Banan ] -def openimages(subset: str): +def openimages(base_dir:Path, subset:str, ann_file:Path): valid_subsets = ['train', 'validation'] if subset not in valid_subsets: raise ValueError(f"{subset=} must be one of {valid_subsets}") - ann_file = BASEDIR / f"{subset}/labels/openimages-mlperf.json" - - if not ann_file.is_file(): - fetch_openimages(ann_file, subset) - - return ann_file + fetch_openimages(ann_file, base_dir, subset) # this slows down the conversion a lot! # maybe use https://raw.githubusercontent.com/scardine/image_size/master/get_image_size.py @@ -112,10 +106,10 @@ def download_image(bucket, subset, image_id, data_dir): except botocore.exceptions.ClientError as exception: sys.exit(f"ERROR when downloading image `validation/{image_id}`: {str(exception)}") -def fetch_openimages(output_fn, subset: str): +def fetch_openimages(output_fn:str, base_dir:Path, subset:str): bucket = boto3.resource("s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED)).Bucket(BUCKET_NAME) - annotations_dir, data_dir = BASEDIR / "annotations", BASEDIR / f"{subset}/data" + annotations_dir, data_dir = base_dir / "annotations", base_dir / f"{subset}/data" annotations_dir.mkdir(parents=True, exist_ok=True) data_dir.mkdir(parents=True, exist_ok=True) @@ -143,8 +137,8 @@ def fetch_openimages(output_fn, subset: str): print("Converting annotations to COCO format...") export_to_coco(class_map, annotations, image_list, data_dir, output_fn, subset) -def image_load(subset, fn): - img_folder = BASEDIR / f"{subset}/data" +def image_load(base_dir, subset, fn): + img_folder = base_dir / f"{subset}/data" img = Image.open(img_folder / fn).convert('RGB') import torchvision.transforms.functional as F ret = F.resize(img, size=(800, 800)) @@ -164,18 +158,28 @@ def prepare_target(annotations, img_id, img_size): classes = classes[keep] return {"boxes": boxes, "labels": classes, "image_id": img_id, "image_size": img_size} -def iterate(coco, bs=8): +def iterate(coco, base_dir, bs=8): image_ids = sorted(coco.imgs.keys()) for i in range(0, len(image_ids), bs): X, targets = [], [] for img_id in image_ids[i:i+bs]: img_dict = coco.loadImgs(img_id)[0] - x, original_size = image_load(img_dict['subset'], img_dict["file_name"]) + x, original_size = image_load(base_dir, img_dict['subset'], img_dict["file_name"]) X.append(x) annotations = coco.loadAnns(coco.getAnnIds(img_id)) targets.append(prepare_target(annotations, img_id, original_size)) yield np.array(X), targets +def download_dataset(base_dir:Path, subset:str) -> Path: + if (ann_file:=base_dir / f"{subset}/labels/openimages-mlperf.json").is_file(): print(f"{subset} dataset is already available") + else: + print(f"Downloading {subset} dataset...") + openimages(base_dir, subset, ann_file) + print("Done") + + return ann_file + + if __name__ == "__main__": - openimages("validation") - openimages("train") + download_dataset(base_dir:=getenv("BASE_DIR", BASEDIR), "train") + download_dataset(base_dir, "validation")