mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-07 22:23:55 -05:00
fix bert training config (#12647)
FREE_INTERMEDIATE=0 REWRITE_STACK_LIMIT=500000
This commit is contained in:
@@ -1188,7 +1188,9 @@ def train_bert():
|
||||
if MLLOGGER and RUNMLPERF:
|
||||
MLLOGGER.start(key=mllog_constants.EVAL_START, value=None, metadata={"epoch_num": i*GBS, "step_num": i})
|
||||
if getenv("RESET_STEP"): train_step_bert.reset()
|
||||
elif getenv("FREE_INTERMEDIATE", 1) and train_step_bert.captured is not None: train_step_bert.captured.free_intermediates()
|
||||
elif getenv("FREE_INTERMEDIATE", 0) and train_step_bert.captured is not None:
|
||||
# TODO: FREE_INTERMEDIATE nan'ed after jit step 2
|
||||
train_step_bert.captured.free_intermediates()
|
||||
eval_lm_losses = []
|
||||
eval_clsf_losses = []
|
||||
eval_lm_accs = []
|
||||
@@ -1222,7 +1224,7 @@ def train_bert():
|
||||
return
|
||||
|
||||
if getenv("RESET_STEP"): eval_step_bert.reset()
|
||||
elif getenv("FREE_INTERMEDIATE", 1) and eval_step_bert.captured is not None: eval_step_bert.captured.free_intermediates()
|
||||
elif getenv("FREE_INTERMEDIATE", 0) and eval_step_bert.captured is not None: eval_step_bert.captured.free_intermediates()
|
||||
|
||||
del eval_data
|
||||
avg_lm_loss = sum(eval_lm_losses) / len(eval_lm_losses)
|
||||
|
||||
@@ -6,6 +6,7 @@ export DEFAULT_FLOAT="HALF" GPUS=8 BS=1024 EVAL_BS=1024
|
||||
export OPT_BASE_LEARNING_RATE=0.0011 OPT_LAMB_BETA_1=0.60466 OPT_LAMB_BETA_2=0.85437 DECAY=0.1
|
||||
|
||||
export IGNORE_OOB=1
|
||||
export REWRITE_STACK_LIMIT=500000
|
||||
|
||||
export BEAM=3 BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1 FREE_INTERMEDIATE=0
|
||||
|
||||
@@ -9,6 +9,7 @@ export OPT_BASE_LEARNING_RATE=0.0011 OPT_LAMB_BETA_1=0.60466 OPT_LAMB_BETA_2=0.8
|
||||
export TRAIN_STEPS=3900
|
||||
|
||||
export IGNORE_OOB=1
|
||||
export REWRITE_STACK_LIMIT=500000
|
||||
|
||||
export BEAM=3 BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1 FREE_INTERMEDIATE=0
|
||||
|
||||
@@ -12,6 +12,7 @@ export OPT_BASE_LEARNING_RATE=0.0011 OPT_LAMB_BETA_1=0.60466 OPT_LAMB_BETA_2=0.8
|
||||
export TRAIN_STEPS=3900
|
||||
|
||||
export IGNORE_OOB=1
|
||||
export REWRITE_STACK_LIMIT=500000
|
||||
|
||||
export BEAM=3 BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1 FREE_INTERMEDIATE=0
|
||||
|
||||
@@ -5,6 +5,7 @@ export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=90 EVAL_BS=90
|
||||
|
||||
export IGNORE_OOB=1
|
||||
export REWRITE_STACK_LIMIT=500000
|
||||
|
||||
export BEAM=8 BEAM_UOPS_MAX=10000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
|
||||
@@ -5,6 +5,7 @@ export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=90 EVAL_BS=90
|
||||
|
||||
export IGNORE_OOB=1
|
||||
export REWRITE_STACK_LIMIT=500000
|
||||
|
||||
export BEAM=8 BEAM_UOPS_MAX=10000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
|
||||
@@ -8,6 +8,7 @@ export SUBMISSION_PLATFORM="tinybox_green"
|
||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=90 EVAL_BS=90
|
||||
|
||||
export IGNORE_OOB=1
|
||||
export REWRITE_STACK_LIMIT=500000
|
||||
|
||||
export BEAM=8 BEAM_UOPS_MAX=10000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
|
||||
@@ -5,6 +5,7 @@ export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=90 EVAL_BS=90
|
||||
|
||||
export IGNORE_OOB=1
|
||||
export REWRITE_STACK_LIMIT=500000
|
||||
|
||||
export BEAM=5 BEAM_UOPS_MAX=8000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
|
||||
@@ -5,6 +5,7 @@ export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=90 EVAL_BS=90
|
||||
|
||||
export IGNORE_OOB=1
|
||||
export REWRITE_STACK_LIMIT=500000
|
||||
|
||||
export BEAM=5 BEAM_UOPS_MAX=8000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
|
||||
@@ -8,6 +8,7 @@ export SUBMISSION_PLATFORM="tinybox_red"
|
||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=90 EVAL_BS=90
|
||||
|
||||
export IGNORE_OOB=1
|
||||
export REWRITE_STACK_LIMIT=500000
|
||||
|
||||
export BEAM=5 BEAM_UOPS_MAX=8000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
export IGNORE_JIT_FIRST_BEAM=1
|
||||
|
||||
Reference in New Issue
Block a user