mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
@@ -2,14 +2,13 @@
|
|||||||
|
|
||||||
export PYTHONPATH="."
|
export PYTHONPATH="."
|
||||||
export MODEL="bert"
|
export MODEL="bert"
|
||||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
|
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=24
|
||||||
|
|
||||||
export BEAM=4 BEAM_UOPS_MAX=3000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
export BEAM=5 BEAM_UOPS_MAX=3000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||||
export IGNORE_JIT_FIRST_BEAM=1
|
export IGNORE_JIT_FIRST_BEAM=1
|
||||||
export BEAM_LOG_SURPASS_MAX=1
|
export BEAM_LOG_SURPASS_MAX=1
|
||||||
export BASEDIR="/raid/datasets/wiki"
|
export BASEDIR="/raid/datasets/wiki"
|
||||||
|
|
||||||
export RESET_STEP=1
|
|
||||||
export BENCHMARK=10 DEBUG=2
|
export BENCHMARK=10 DEBUG=2
|
||||||
|
|
||||||
python3 examples/mlperf/model_train.py
|
python3 examples/mlperf/model_train.py
|
||||||
|
|||||||
@@ -2,9 +2,9 @@
|
|||||||
|
|
||||||
export PYTHONPATH="."
|
export PYTHONPATH="."
|
||||||
export MODEL="bert"
|
export MODEL="bert"
|
||||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
|
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=24
|
||||||
|
|
||||||
export BEAM=4 BEAM_UOPS_MAX=3000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
export BEAM=5 BEAM_UOPS_MAX=3000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||||
export IGNORE_JIT_FIRST_BEAM=1
|
export IGNORE_JIT_FIRST_BEAM=1
|
||||||
export BASEDIR="/raid/datasets/wiki"
|
export BASEDIR="/raid/datasets/wiki"
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
export PYTHONPATH="."
|
export PYTHONPATH="."
|
||||||
export MODEL="bert"
|
export MODEL="bert"
|
||||||
export SUBMISSION_PLATFORM="tinybox_green"
|
export SUBMISSION_PLATFORM="tinybox_green"
|
||||||
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=96
|
export DEFAULT_FLOAT="HALF" SUM_DTYPE="HALF" GPUS=6 BS=96 EVAL_BS=24
|
||||||
|
|
||||||
export BEAM=5 BEAM_UOPS_MAX=3000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
export BEAM=5 BEAM_UOPS_MAX=3000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||||
export IGNORE_JIT_FIRST_BEAM=1
|
export IGNORE_JIT_FIRST_BEAM=1
|
||||||
@@ -17,7 +17,7 @@ DATETIME=$(date "+%m%d%H%M")
|
|||||||
LOGFILE="bert_green_${DATETIME}_${SEED}.log"
|
LOGFILE="bert_green_${DATETIME}_${SEED}.log"
|
||||||
|
|
||||||
# init
|
# init
|
||||||
BENCHMARK=10 INITMLPERF=1 RESET_STEP=1 BEAM_LOG_SURPASS_MAX=1 python3 examples/mlperf/model_train.py | tee $LOGFILE
|
BENCHMARK=10 INITMLPERF=1 BEAM_LOG_SURPASS_MAX=1 python3 examples/mlperf/model_train.py | tee $LOGFILE
|
||||||
|
|
||||||
# run
|
# run
|
||||||
PARALLEL=0 RUNMLPERF=1 python3 examples/mlperf/model_train.py | tee -a $LOGFILE
|
PARALLEL=0 RUNMLPERF=1 python3 examples/mlperf/model_train.py | tee -a $LOGFILE
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ LOGFILE="bert_red_${DATETIME}_${SEED}.log"
|
|||||||
|
|
||||||
# init
|
# init
|
||||||
sudo rmmod amdgpu || true
|
sudo rmmod amdgpu || true
|
||||||
BENCHMARK=10 INITMLPERF=1 RESET_STEP=1 BEAM_LOG_SURPASS_MAX=1 python3 examples/mlperf/model_train.py | tee $LOGFILE
|
BENCHMARK=10 INITMLPERF=1 BEAM_LOG_SURPASS_MAX=1 python3 examples/mlperf/model_train.py | tee $LOGFILE
|
||||||
|
|
||||||
# run
|
# run
|
||||||
# TODO: AMD driver hangs during init, but is 5% faster per step in real run.
|
# TODO: AMD driver hangs during init, but is 5% faster per step in real run.
|
||||||
|
|||||||
Reference in New Issue
Block a user