adjust LR to be the ratio of the batch size

2026-04-29 03:00:14 -04:00 · 2025-02-07 19:46:54 +00:00
parent e8e0791b00
commit 37aab697b8
1 changed files with 3 additions and 3 deletions
--- a/examples/mlperf/model_train.py
+++ b/examples/mlperf/model_train.py
@@ -410,12 +410,12 @@ def train_retinanet():

  # ** hyperparameters **
  # using https://github.com/mlcommons/logging/blob/96d0acee011ba97702532dcc39e6eeaa99ebef24/mlperf_logging/rcp_checker/training_4.1.0/rcps_ssd.json#L3
-  config["lr"] = lr = 1e-4
-  config["lr_warmup_epochs"] = lr_warmup_epochs = 1
-  config["lr_warmup_factor"] = lr_warmup_factor = 1e-3
  config["seed"] = SEED = getenv("SEED", random.SystemRandom().randint(0, 2**32 - 1))
  config["bs"] = BS = getenv("BS", 128)
  config["epochs"] = EPOCHS = getenv("EPOCHS", 4)
+  config["lr"] = lr = 1e-4 * (BS / 256)
+  config["lr_warmup_epochs"] = lr_warmup_epochs = 1
+  config["lr_warmup_factor"] = lr_warmup_factor = 1e-3

  if SEED:
    Tensor.manual_seed(SEED)