mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
free_intermediates in bert (#9040)
also re-enable dropout and update EVAL_BS
This commit is contained in:
@@ -804,7 +804,8 @@ def train_bert():
|
||||
if i % eval_step_freq == 0 or (BENCHMARK and i == BENCHMARK):
|
||||
if MLLOGGER and RUNMLPERF:
|
||||
MLLOGGER.start(key=mllog_constants.EVAL_START, value=None, metadata={"epoch_num": i*BS, "step_num": i})
|
||||
if getenv("RESET_STEP", 1): train_step_bert.reset()
|
||||
if getenv("RESET_STEP", 0): train_step_bert.reset()
|
||||
train_step_bert.captured.free_intermediates()
|
||||
eval_lm_losses = []
|
||||
eval_clsf_losses = []
|
||||
eval_lm_accs = []
|
||||
@@ -840,7 +841,8 @@ def train_bert():
|
||||
MLLOGGER.event(key=mllog_constants.INIT_STOP, value=None)
|
||||
return
|
||||
|
||||
if getenv("RESET_STEP", 1): eval_step_bert.reset()
|
||||
if getenv("RESET_STEP", 0): eval_step_bert.reset()
|
||||
eval_step_bert.captured.free_intermediates()
|
||||
del eval_data
|
||||
avg_lm_loss = sum(eval_lm_losses) / len(eval_lm_losses)
|
||||
avg_clsf_loss = sum(eval_clsf_losses) / len(eval_clsf_losses)
|
||||
|
||||
@@ -2,13 +2,11 @@
|
||||
|
||||
export PYTHONPATH="."
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=36
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=66
|
||||
|
||||
export BEAM=4 BEAM_UOPS_MAX=2000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
# TODO: remove DISABLE_DROPOUT=1
|
||||
export DISABLE_DROPOUT=1
|
||||
|
||||
export BENCHMARK=10 DEBUG=2
|
||||
|
||||
|
||||
@@ -2,13 +2,11 @@
|
||||
|
||||
export PYTHONPATH="."
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=36
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=66
|
||||
|
||||
export BEAM=4 BEAM_UOPS_MAX=2000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
# TODO: remove DISABLE_DROPOUT=1
|
||||
export DISABLE_DROPOUT=1
|
||||
|
||||
export WANDB=1 PARALLEL=0
|
||||
|
||||
|
||||
@@ -3,13 +3,11 @@
|
||||
export PYTHONPATH="."
|
||||
export MODEL="bert"
|
||||
export SUBMISSION_PLATFORM="tinybox_green"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=36
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=66
|
||||
|
||||
export BEAM=4 BEAM_UOPS_MAX=2000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
# TODO: remove DISABLE_DROPOUT=1
|
||||
export DISABLE_DROPOUT=1
|
||||
|
||||
# pip install -e ".[mlperf]"
|
||||
export LOGMLPERF=1
|
||||
|
||||
@@ -2,13 +2,11 @@
|
||||
|
||||
export PYTHONPATH="."
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=36
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=66
|
||||
|
||||
export BEAM=3 BEAM_UOPS_MAX=3000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
# TODO: remove DISABLE_DROPOUT=1
|
||||
export DISABLE_DROPOUT=1
|
||||
|
||||
export BENCHMARK=10 DEBUG=2
|
||||
|
||||
|
||||
@@ -2,13 +2,11 @@
|
||||
|
||||
export PYTHONPATH="."
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=36
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=66
|
||||
|
||||
export BEAM=3 BEAM_UOPS_MAX=3000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
# TODO: remove DISABLE_DROPOUT=1
|
||||
export DISABLE_DROPOUT=1
|
||||
|
||||
export WANDB=1 PARALLEL=0
|
||||
|
||||
|
||||
@@ -3,13 +3,11 @@
|
||||
export PYTHONPATH="."
|
||||
export MODEL="bert"
|
||||
export SUBMISSION_PLATFORM="tinybox_red"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=36
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=66
|
||||
|
||||
export BEAM=3 BEAM_UOPS_MAX=3000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
# TODO: remove DISABLE_DROPOUT=1
|
||||
export DISABLE_DROPOUT=1
|
||||
|
||||
# pip install -e ".[mlperf]"
|
||||
export LOGMLPERF=1
|
||||
|
||||
Reference in New Issue
Block a user