remove retinanet INITMLPERF from beam script (#10011)

it only controls logging, loading real data or not is solely controlled by RUNMLPERF
This commit is contained in:
chenyu
2025-04-23 14:32:54 -04:00
committed by GitHub
parent cc52b9c528
commit a3f938dbee
3 changed files with 7 additions and 9 deletions

View File

@@ -361,7 +361,7 @@ def train_retinanet():
NUM_CLASSES = len(MLPERF_CLASSES)
BASEDIR = getenv("BASEDIR", BASEDIR)
BENCHMARK = getenv("BENCHMARK")
INITMLPERF = getenv("INITMLPERF")
# INITMLPERF = getenv("INITMLPERF")
RUNMLPERF = getenv("RUNMLPERF")
config["gpus"] = GPUS = [f"{Device.DEFAULT}:{i}" for i in range(getenv("GPUS", 6))]
@@ -479,7 +479,7 @@ def train_retinanet():
# ** training loop **
BEAM.value = TRAIN_BEAM
if INITMLPERF:
if not RUNMLPERF:
i, proc = 0, _fake_data_get(BS)
else:
train_dataloader = batch_load_retinanet(train_dataset, False, base_dir_path, batch_size=BS, seed=SEED)
@@ -499,7 +499,7 @@ def train_retinanet():
if len(prev_cookies) == getenv("STORE_COOKIES", 1): prev_cookies = [] # free previous cookies after gpu work has been enqueued
try:
if INITMLPERF:
if not RUNMLPERF:
next_proc = _fake_data_get(BS)
else:
next_proc = _data_get(it)
@@ -552,7 +552,7 @@ def train_retinanet():
if getenv("RESET_STEP", 1): _train_step.reset()
with Tensor.train(mode=False), Tensor.test():
if INITMLPERF:
if not RUNMLPERF:
i, proc = 0, _fake_data_get(EVAL_BS, val=(val:=True))
else:
val_dataloader = batch_load_retinanet(val_dataset, (val:=True), Path(BASEDIR), batch_size=EVAL_BS, shuffle=False, seed=SEED)
@@ -583,7 +583,7 @@ def train_retinanet():
if len(prev_cookies) == getenv("STORE_COOKIES", 1): prev_cookies = [] # free previous cookies after gpu work has been enqueued
try:
if INITMLPERF:
if not RUNMLPERF:
next_proc = _fake_data_get(EVAL_BS, val=val)
else:
next_proc = _data_get(it, val=val)

View File

@@ -9,7 +9,6 @@ export BASEDIR="/raid/datasets/openimages"
export TRAIN_BEAM=2 IGNORE_JIT_FIRST_BEAM=1 BEAM_UOPS_MAX=1500 BEAM_UPCAST_MAX=64 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5 BEAM_PADTO=0
export INITMLPERF=1
export BENCHMARK=10 DEBUG=2
export BENCHMARK=5 DEBUG=2
python examples/mlperf/model_train.py

View File

@@ -9,7 +9,6 @@ export BASEDIR="/raid/datasets/openimages"
export TRAIN_BEAM=2 IGNORE_JIT_FIRST_BEAM=1 BEAM_UOPS_MAX=1500 BEAM_UPCAST_MAX=64 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5 BEAM_PADTO=0
export INITMLPERF=1
export BENCHMARK=10 DEBUG=2
export BENCHMARK=5 DEBUG=2
python examples/mlperf/model_train.py