bert eval at the end of training (#9070)

always eval at the last epoch
2026-01-10 07:28:15 -05:00 · 2025-02-13 16:29:44 -05:00
parent e02e3b94c3
commit 9e91898941
1 changed files with 1 additions and 1 deletions
--- a/examples/mlperf/model_train.py
+++ b/examples/mlperf/model_train.py
@@ -801,7 +801,7 @@ def train_bert():
            f"epoch global_mem: {train_steps * GlobalCounters.global_mem:_}")

    # ** eval loop **
-    if i % eval_step_freq == 0 or (BENCHMARK and i == BENCHMARK):
+    if i % eval_step_freq == 0 or (BENCHMARK and i == BENCHMARK) or i == train_steps:
      if MLLOGGER and RUNMLPERF:
        MLLOGGER.start(key=mllog_constants.EVAL_START, value=None, metadata={"epoch_num": i*BS, "step_num": i})
      if getenv("RESET_STEP", 0): train_step_bert.reset()