print postprocess_detections time in retinanet eval (#10005)

`BS=96 BASEDIR="/raid/datasets/openimages" MODEL=retinanet python examples/mlperf/model_eval.py` ``` ... loaded dataset @ 8.64s loaded initial data @ 12.57s ****** 619.97 ms to enqueue, 46042.13 ms to realize ( 116.22 ms fetching, 45399.58 ms postprocess_detections). 0.09 examples/sec. 0.83 TFLOPS @ 59.23s ****** 147.49 ms to enqueue, 37362.16 ms to realize ( 146.96 ms fetching, 36618.84 ms postprocess_detections). 0.11 examples/sec. 1.03 TFLOPS @ 96.74s ****** 152.85 ms to enqueue, 37244.08 ms to realize ( 120.67 ms fetching, 36235.19 ms postprocess_detections). 0.11 examples/sec. 1.04 TFLOPS @ 134.14s ****** 146.39 ms to enqueue, 37279.85 ms to realize ( 65.07 ms fetching, 36233.56 ms postprocess_detections). 0.11 examples/sec. 1.04 TFLOPS @ 171.56s ****** 152.41 ms to enqueue, 37264.04 ms to realize ( 127.08 ms fetching, 36196.10 ms postprocess_detections). 0.11 examples/sec. 1.04 TFLOPS @ 208.98s ****** 151.29 ms to enqueue, 36868.08 ms to realize ( 142.73 ms fetching, 36153.07 ms postprocess_detections). 0.11 examples/sec. 1.05 TFLOPS @ 246.00s ****** 136.41 ms to enqueue, 37325.04 ms to realize ( 90.29 ms fetching, 36573.38 ms postprocess_detections). 0.11 examples/sec. 1.04 TFLOPS @ 283.46s ```
2026-01-09 15:08:02 -05:00 · 2025-04-23 11:39:56 -04:00
parent 2ed3acd767
commit d3a8d5c128
1 changed files with 3 additions and 2 deletions
--- a/examples/mlperf/model_eval.py
+++ b/examples/mlperf/model_eval.py
@@ -94,7 +94,7 @@ def eval_retinanet():
  mdl.load_from_pretrained()
  tlog("loaded models")

-  coco = COCO(download_dataset(base_dir:=getenv("BASE_DIR", BASEDIR), 'validation'))
+  coco = COCO(download_dataset(base_dir:=getenv("BASEDIR", BASEDIR), 'validation'))
  coco_eval = COCOeval(coco, iouType="bbox")
  coco_evalimgs, evaluated_imgs, ncats, narea = [], [], len(coco_eval.params.catIds), len(coco_eval.params.areaRng)
  tlog("loaded dataset")
@@ -116,6 +116,7 @@ def eval_retinanet():
    except StopIteration: next_proc = None
    nd = time.perf_counter()
    predictions, img_ids = mdl.postprocess_detections(proc[0].numpy(), orig_image_sizes=proc[2]), proc[1]
+    pd = time.perf_counter()
    coco_results  = [{"image_id": img_ids[i], "category_id": label, "bbox": box.tolist(), "score": score}
      for i, prediction in enumerate(predictions) for box, score, label in zip(*prediction.values())]
    with redirect_stdout(None):
@@ -126,7 +127,7 @@ def eval_retinanet():
    coco_evalimgs.append(np.array(coco_eval.evalImgs).reshape(ncats, narea, len(img_ids)))
    n += len(proc[0])
    et = time.perf_counter()
-    tlog(f"****** {(run-st)*1000:7.2f} ms to enqueue, {(et-run)*1000:7.2f} ms to realize ({(nd-run)*1000:7.2f} ms fetching). {(len(proc))/(et-st):8.2f} examples/sec. {GlobalCounters.global_ops*1e-12/(et-st):5.2f} TFLOPS")
+    tlog(f"****** {(run-st)*1000:7.2f} ms to enqueue, {(et-run)*1000:7.2f} ms to realize ({(nd-run)*1000:7.2f} ms fetching, {(pd-run)*1000:4.2f} ms postprocess_detections). {(len(proc))/(et-st):8.2f} examples/sec. {GlobalCounters.global_ops*1e-12/(et-st):5.2f} TFLOPS")
    st = et
    proc, next_proc = next_proc, None