From a968c4c3a4fbd5ca0171c6fe0f9e86c6b0143bf3 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Thu, 25 May 2023 11:36:43 -0700 Subject: [PATCH] Cleanup mlperf (#797) * improve factorization * cleanups --- .gitignore | 1 + datasets/imagenet.py | 16 +++++++--------- datasets/librispeech.py | 2 +- examples/mlperf/model_eval.py | 20 +++++++++++++++----- examples/mlperf/model_spec.py | 33 +++++++++++++++++++++++---------- 5 files changed, 47 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index c07ab3336b..9fc97820a1 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ disassemblers/cuda_ioctl_sniffer *.prof datasets/cifar-10-python.tar.gz datasets/librispeech/ +datasets/imagenet/ diff --git a/datasets/imagenet.py b/datasets/imagenet.py index 546dffc80b..e80da024e3 100644 --- a/datasets/imagenet.py +++ b/datasets/imagenet.py @@ -1,24 +1,22 @@ # for imagenet download prepare.sh and run it -import os, glob, random +import glob, random import json import numpy as np from PIL import Image -import functools -import torchvision.transforms as transforms +import functools, pathlib -BASEDIR = "/Users/kafka/fun/imagenet" -ci = json.load(open(os.path.join(BASEDIR, "imagenet_class_index.json"))) +BASEDIR = pathlib.Path(__file__).parent.parent / "datasets/imagenet" +ci = json.load(open(BASEDIR / "imagenet_class_index.json")) cir = {v[0]: int(k) for k,v in ci.items()} @functools.lru_cache(None) def get_train_files(): - train_files = open(os.path.join(BASEDIR, "train_files")).read().strip().split("\n") - return [os.path.join(BASEDIR, "train", x) for x in train_files] + train_files = open(BASEDIR / "train_files").read().strip().split("\n") + return [(BASEDIR / "train" / x) for x in train_files] @functools.lru_cache(None) def get_val_files(): - #val_files = open(os.path.join(BASEDIR, "val_files")).read().strip().split("\n") - val_files = glob.glob(os.path.join(BASEDIR, "val", "*", "*")) + val_files = glob.glob(str(BASEDIR / "val/*/*")) return val_files #rrc = transforms.RandomResizedCrop(224) diff --git a/datasets/librispeech.py b/datasets/librispeech.py index b73f02efd0..7b77975c31 100644 --- a/datasets/librispeech.py +++ b/datasets/librispeech.py @@ -10,7 +10,7 @@ For mlperf validation the dev-clean dataset is used. Then all the flacs have to be converted to wav using something like: ```fish -for file in **/*.flac; ffmpeg -i $file -ar 16k "$(dirname $file)/$(basename $file .flac).wav"; end +for file in $(find * | grep flac); do ffmpeg -i $file -ar 16k "$(dirname $file)/$(basename $file .flac).wav"; done ``` Then this [file](https://github.com/mlcommons/inference/blob/master/speech_recognition/rnnt/dev-clean-wav.json) has to also be put in `datasets/librispeech`. diff --git a/examples/mlperf/model_eval.py b/examples/mlperf/model_eval.py index 32c196ce81..a9fc3c8ff9 100644 --- a/examples/mlperf/model_eval.py +++ b/examples/mlperf/model_eval.py @@ -1,12 +1,9 @@ import time import numpy as np from tinygrad.tensor import Tensor +from tinygrad.helpers import getenv -if __name__ == "__main__": - # inference only - Tensor.training = False - Tensor.no_grad = True - +def eval_resnet(): # Resnet50-v1.5 from tinygrad.jit import TinyJit from models.resnet import ResNet50 @@ -43,6 +40,7 @@ if __name__ == "__main__": print(f"****** {n}/{d} {n*100.0/d:.2f}%") st = time.perf_counter() +def eval_rnnt(): # RNN-T from models.rnnt import RNNT mdl = RNNT() @@ -70,3 +68,15 @@ if __name__ == "__main__": c += len(tt) print(f"WER: {scores/words}, {words} words, raw scores: {scores}, c: {c}") st = time.perf_counter() + +if __name__ == "__main__": + # inference only + Tensor.training = False + Tensor.no_grad = True + + models = getenv("MODEL", "resnet,retinanet,unet3d,rnnt,bert").split(",") + for m in models: + nm = f"eval_{m}" + if nm in globals(): + print(f"eval {m}") + globals()[nm]() diff --git a/examples/mlperf/model_spec.py b/examples/mlperf/model_spec.py index ddaf014082..3ab91937f6 100644 --- a/examples/mlperf/model_spec.py +++ b/examples/mlperf/model_spec.py @@ -1,6 +1,6 @@ # load each model here, quick benchmark from tinygrad.tensor import Tensor -from tinygrad.helpers import GlobalCounters +from tinygrad.helpers import GlobalCounters, getenv def test_model(model, *inputs): GlobalCounters.reset() @@ -8,27 +8,25 @@ def test_model(model, *inputs): # TODO: return event future to still get the time_sum_s without DEBUG=2 print(f"{GlobalCounters.global_ops*1e-9:.2f} GOPS, {GlobalCounters.time_sum_s*1000:.2f} ms") -if __name__ == "__main__": - # inference only for now - Tensor.training = False - Tensor.no_grad = True - +def spec_resnet(): # Resnet50-v1.5 from models.resnet import ResNet50 mdl = ResNet50() img = Tensor.randn(1, 3, 224, 224) test_model(mdl, img) - # Retinanet +def spec_retinanet(): + # TODO: Retinanet + pass +def spec_unet3d(): # 3D UNET from models.unet3d import UNet3D mdl = UNet3D() - #mdl.load_from_pretrained() img = Tensor.randn(1, 1, 5, 224, 224) test_model(mdl, img) - # RNNT +def spec_rnnt(): from models.rnnt import RNNT mdl = RNNT() mdl.load_from_pretrained() @@ -36,4 +34,19 @@ if __name__ == "__main__": y = Tensor.randn(1, 220) test_model(mdl, x, y) - # BERT-large +def spec_bert(): + # TODO: BERT-large + pass + +if __name__ == "__main__": + # inference only for now + Tensor.training = False + Tensor.no_grad = True + + for m in getenv("MODEL", "resnet,retinanet,unet3d,rnnt,bert").split(","): + nm = f"spec_{m}" + if nm in globals(): + print(f"testing {m}") + globals()[nm]() + +