From d3a8d5c128c8975f72d0592b80b3e004ec403f6b Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 23 Apr 2025 11:39:56 -0400 Subject: [PATCH] print postprocess_detections time in retinanet eval (#10005) `BS=96 BASEDIR="/raid/datasets/openimages" MODEL=retinanet python examples/mlperf/model_eval.py` ``` ... loaded dataset @ 8.64s loaded initial data @ 12.57s ****** 619.97 ms to enqueue, 46042.13 ms to realize ( 116.22 ms fetching, 45399.58 ms postprocess_detections). 0.09 examples/sec. 0.83 TFLOPS @ 59.23s ****** 147.49 ms to enqueue, 37362.16 ms to realize ( 146.96 ms fetching, 36618.84 ms postprocess_detections). 0.11 examples/sec. 1.03 TFLOPS @ 96.74s ****** 152.85 ms to enqueue, 37244.08 ms to realize ( 120.67 ms fetching, 36235.19 ms postprocess_detections). 0.11 examples/sec. 1.04 TFLOPS @ 134.14s ****** 146.39 ms to enqueue, 37279.85 ms to realize ( 65.07 ms fetching, 36233.56 ms postprocess_detections). 0.11 examples/sec. 1.04 TFLOPS @ 171.56s ****** 152.41 ms to enqueue, 37264.04 ms to realize ( 127.08 ms fetching, 36196.10 ms postprocess_detections). 0.11 examples/sec. 1.04 TFLOPS @ 208.98s ****** 151.29 ms to enqueue, 36868.08 ms to realize ( 142.73 ms fetching, 36153.07 ms postprocess_detections). 0.11 examples/sec. 1.05 TFLOPS @ 246.00s ****** 136.41 ms to enqueue, 37325.04 ms to realize ( 90.29 ms fetching, 36573.38 ms postprocess_detections). 0.11 examples/sec. 1.04 TFLOPS @ 283.46s ``` --- examples/mlperf/model_eval.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/mlperf/model_eval.py b/examples/mlperf/model_eval.py index 60d76496a2..3630ebeb80 100644 --- a/examples/mlperf/model_eval.py +++ b/examples/mlperf/model_eval.py @@ -94,7 +94,7 @@ def eval_retinanet(): mdl.load_from_pretrained() tlog("loaded models") - coco = COCO(download_dataset(base_dir:=getenv("BASE_DIR", BASEDIR), 'validation')) + coco = COCO(download_dataset(base_dir:=getenv("BASEDIR", BASEDIR), 'validation')) coco_eval = COCOeval(coco, iouType="bbox") coco_evalimgs, evaluated_imgs, ncats, narea = [], [], len(coco_eval.params.catIds), len(coco_eval.params.areaRng) tlog("loaded dataset") @@ -116,6 +116,7 @@ def eval_retinanet(): except StopIteration: next_proc = None nd = time.perf_counter() predictions, img_ids = mdl.postprocess_detections(proc[0].numpy(), orig_image_sizes=proc[2]), proc[1] + pd = time.perf_counter() coco_results = [{"image_id": img_ids[i], "category_id": label, "bbox": box.tolist(), "score": score} for i, prediction in enumerate(predictions) for box, score, label in zip(*prediction.values())] with redirect_stdout(None): @@ -126,7 +127,7 @@ def eval_retinanet(): coco_evalimgs.append(np.array(coco_eval.evalImgs).reshape(ncats, narea, len(img_ids))) n += len(proc[0]) et = time.perf_counter() - tlog(f"****** {(run-st)*1000:7.2f} ms to enqueue, {(et-run)*1000:7.2f} ms to realize ({(nd-run)*1000:7.2f} ms fetching). {(len(proc))/(et-st):8.2f} examples/sec. {GlobalCounters.global_ops*1e-12/(et-st):5.2f} TFLOPS") + tlog(f"****** {(run-st)*1000:7.2f} ms to enqueue, {(et-run)*1000:7.2f} ms to realize ({(nd-run)*1000:7.2f} ms fetching, {(pd-run)*1000:4.2f} ms postprocess_detections). {(len(proc))/(et-st):8.2f} examples/sec. {GlobalCounters.global_ops*1e-12/(et-st):5.2f} TFLOPS") st = et proc, next_proc = next_proc, None