From bdf442717c8c8b587156e48f7e6df3ecf7b8e579 Mon Sep 17 00:00:00 2001 From: Francis Lata Date: Fri, 28 Feb 2025 14:58:28 +0000 Subject: [PATCH] update seeding on dataloader and the start of training script --- examples/mlperf/dataloader.py | 9 +++++---- examples/mlperf/model_train.py | 4 +--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/examples/mlperf/dataloader.py b/examples/mlperf/dataloader.py index f986c614ab..e7ed43cebf 100644 --- a/examples/mlperf/dataloader.py +++ b/examples/mlperf/dataloader.py @@ -361,13 +361,14 @@ def load_retinanet_data(base_dir:Path, val:bool, queue_in:Queue, queue_out:Queue idx, img, tgt = data img = image_load(base_dir, img["subset"], img["file_name"]) - if seed is not None: - np.random.seed(seed) - torch.manual_seed(seed) - if val: img = resize(img)[0] else: + if seed is not None: + np.random.seed(seed * 2 ** 10 + idx) + random.seed(seed * 2 ** 10 + idx) + torch.manual_seed(seed * 2 ** 10 + idx) + img, tgt = random_horizontal_flip(img, tgt) img, tgt, _ = resize(img, tgt=tgt) match_quality_matrix = box_iou(tgt["boxes"], (anchor := np.concatenate(generate_anchors((800, 800))))) diff --git a/examples/mlperf/model_train.py b/examples/mlperf/model_train.py index 6ce2180bc9..3b9e161b34 100644 --- a/examples/mlperf/model_train.py +++ b/examples/mlperf/model_train.py @@ -414,9 +414,7 @@ def train_retinanet(): config["lr_warmup_epochs"] = lr_warmup_epochs = getenv("LR_WARMUP_EPOCHS", 1) config["lr_warmup_factor"] = lr_warmup_factor = getenv("LR_WARMUP_FACTOR", 1e-3) - if SEED: - Tensor.manual_seed(SEED) - np.random.seed(seed=SEED) + if SEED: Tensor.manual_seed(SEED) # ** model initializers ** resnet.BatchNorm = FrozenBatchNorm2d