bert eval at the end of training (#9070)

always eval at the last epoch
This commit is contained in:
chenyu
2025-02-13 16:29:44 -05:00
committed by GitHub
parent e02e3b94c3
commit 9e91898941

View File

@@ -801,7 +801,7 @@ def train_bert():
f"epoch global_mem: {train_steps * GlobalCounters.global_mem:_}")
# ** eval loop **
if i % eval_step_freq == 0 or (BENCHMARK and i == BENCHMARK):
if i % eval_step_freq == 0 or (BENCHMARK and i == BENCHMARK) or i == train_steps:
if MLLOGGER and RUNMLPERF:
MLLOGGER.start(key=mllog_constants.EVAL_START, value=None, metadata={"epoch_num": i*BS, "step_num": i})
if getenv("RESET_STEP", 0): train_step_bert.reset()