From f53be010d7445c422f894fc25a6e2f2171aa8eda Mon Sep 17 00:00:00 2001 From: chenyu Date: Mon, 17 Mar 2025 10:49:56 -0400 Subject: [PATCH] lower bert learning rate (#9481) slightly better. first sub 3hr run https://wandb.ai/chenyuxyz/MLPerf-BERT/runs/0or96ink/overview --- examples/mlperf/model_train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/mlperf/model_train.py b/examples/mlperf/model_train.py index d0f18bf487..aaf4e8f219 100644 --- a/examples/mlperf/model_train.py +++ b/examples/mlperf/model_train.py @@ -658,7 +658,7 @@ def train_bert(): # ** hyperparameters ** BS = config["GLOBAL_BATCH_SIZE"] = getenv("BS", 11 * len(GPUS) if dtypes.default_float in (dtypes.float16, dtypes.bfloat16) else 8 * len(GPUS)) EVAL_BS = config["EVAL_BS"] = getenv("EVAL_BS", 1 * len(GPUS)) - max_lr = config["OPT_BASE_LEARNING_RATE"] = getenv("OPT_BASE_LEARNING_RATE", 0.00018 * math.sqrt(BS/96)) + max_lr = config["OPT_BASE_LEARNING_RATE"] = getenv("OPT_BASE_LEARNING_RATE", 0.000175 * math.sqrt(BS/96)) train_steps = config["TRAIN_STEPS"] = getenv("TRAIN_STEPS", 3300000 // BS) warmup_steps = config["NUM_WARMUP_STEPS"] = getenv("NUM_WARMUP_STEPS", 1)