diff --git a/examples/mlperf/model_train.py b/examples/mlperf/model_train.py
index 56a3f728da..4a8192c80f 100644
--- a/examples/mlperf/model_train.py
+++ b/examples/mlperf/model_train.py
@@ -853,7 +853,7 @@ def train_bert():
         et = time.time()
         eval_times.append(et - st)
 
-        if BENCHMARK and j == BENCHMARK:
+        if BENCHMARK and (j+1) == min(BENCHMARK, max_eval_steps):
           # assume INITMLPERF has BENCHMARK set
           if MLLOGGER and INITMLPERF:
             MLLOGGER.event(key=mllog_constants.INIT_STOP, value=None)
@@ -900,6 +900,9 @@ def train_bert():
         # stop once hitting the target
         break
 
+    # should not happen, BENCHMARK not properly terminated
+    if BENCHMARK: assert i < BENCHMARK, i
+
     if getenv("CKPT") and i % save_ckpt_freq == 0:
       if MLLOGGER and RUNMLPERF:
         if previous_step:
diff --git a/examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_amd/dev_beam.sh b/examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_amd/dev_beam.sh
new file mode 100755
index 0000000000..1b6ff92c14
--- /dev/null
+++ b/examples/mlperf/training_submission_v5.0/tinycorp/benchmarks/bert/implementations/tinybox_amd/dev_beam.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+export PYTHONPATH="."
+export MODEL="bert"
+export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=8 BS=1024 EVAL_BS=1024
+
+export BEAM=3 BEAM_UOPS_MAX=4000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
+export IGNORE_JIT_FIRST_BEAM=1
+# export BEAM_LOG_SURPASS_MAX=1
+# export BASEDIR="/raid/datasets/wiki"
+
+export RESET_STEP=1
+export BENCHMARK=10 DEBUG=2
+
+python3 examples/mlperf/model_train.py