From 7cd7593c5d15e129369646ac79d48cea2f1a2861 Mon Sep 17 00:00:00 2001
From: chenyu <chenyu@fastmail.com>
Date: Wed, 17 Dec 2025 16:54:04 -0500
Subject: [PATCH] add script to train bert on mi350x (#13743)

adapted from mi300 config
---
 .../tinybox_8xMI350X/run_and_time.sh          | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100755 examples/mlperf/training_submission_v6.0/tinycorp/benchmarks/bert/implementations/tinybox_8xMI350X/run_and_time.sh

diff --git a/examples/mlperf/training_submission_v6.0/tinycorp/benchmarks/bert/implementations/tinybox_8xMI350X/run_and_time.sh b/examples/mlperf/training_submission_v6.0/tinycorp/benchmarks/bert/implementations/tinybox_8xMI350X/run_and_time.sh
new file mode 100755
index 0000000000..a2d5cc7df2
--- /dev/null
+++ b/examples/mlperf/training_submission_v6.0/tinycorp/benchmarks/bert/implementations/tinybox_8xMI350X/run_and_time.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+set -e  # Exit on any error
+set -o pipefail  # Make pipeline fail if any command fails
+
+export PYTHONPATH="." AMD=1
+export MODEL="bert"
+export SUBMISSION_PLATFORM="tinybox_8xMI350X"
+export DEFAULT_FLOAT="HALF" GPUS=8 BS=1024 EVAL_BS=1024
+
+# similar to https://github.com/mlcommons/training_results_v3.1/blob/d06288b2bd675a9d88e0e6181f5bb5626b71ec19/Quanta_Cloud_Technology/results/D54U-3U/bert/result_1.txt#L54
+export OPT_BASE_LEARNING_RATE=0.0011 OPT_LAMB_BETA_1=0.60466 OPT_LAMB_BETA_2=0.85437 DECAY=0.1
+export TRAIN_STEPS=3900
+
+export IGNORE_OOB=1
+export REWRITE_STACK_LIMIT=5000000
+
+export BEAM=3 BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
+export IGNORE_JIT_FIRST_BEAM=1 FREE_INTERMEDIATE=0
+export BASEDIR="/raid/datasets/wiki"
+
+# pip install -e ".[mlperf]"
+export LOGMLPERF=1
+
+export SEED=$RANDOM
+DATETIME=$(date "+%m%d%H%M")
+LOGFILE="bert_8xMI350x_${DATETIME}_${SEED}.log"
+
+BENCHMARK=10 INITMLPERF=1 BERT_LAYERS=2 python3 examples/mlperf/model_train.py | tee $LOGFILE
+
+# run
+PARALLEL=0 RUNMLPERF=1 python3 examples/mlperf/model_train.py | tee -a $LOGFILE