mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
feat: llama8b dev_beam.sh (#14358)
This commit is contained in:
@@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
export PYTHONPATH="."
|
||||
export DEV=${DEV:-AMD}
|
||||
export IGNORE_OOB=1
|
||||
export REWRITE_STACK_LIMIT=5000000 HCQDEV_WAIT_TIMEOUT_MS=240000
|
||||
|
||||
export DEBUG=${DEBUG:-2}
|
||||
export FLASH_ATTENTION=${FLASH_ATTENTION:-1}
|
||||
export ALL2ALL=${ALL2ALL:-1}
|
||||
|
||||
export DEFAULT_FLOAT="bfloat16" OPTIM_DTYPE="bfloat16"
|
||||
export DP=8 BS=8 EVAL_BS=8 GRADIENT_ACC_STEPS=1
|
||||
export GBS=$((BS * GRADIENT_ACC_STEPS))
|
||||
|
||||
export MODEL="llama3"
|
||||
export BASEDIR="/raid/datasets/c4-8b/"
|
||||
export SMALL=1
|
||||
export LLAMA3_SIZE=${LLAMA3_SIZE:-"8B"}
|
||||
export EVAL_TARGET=3.3 EVAL_FREQ=12288
|
||||
export LR="4e-4" END_LR="4e-5" WARMUP_SAMPLES=256 MAX_STEPS=1200000
|
||||
export WARMUP_STEPS=$((WARMUP_SAMPLES / GBS))
|
||||
export SAMPLES=$((MAX_STEPS * GBS))
|
||||
|
||||
export SEED=5760
|
||||
|
||||
export JITBEAM=3
|
||||
export BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
|
||||
export FAKEDATA=1 BENCHMARK=10 LLAMA_LAYERS=2
|
||||
|
||||
python3 examples/mlperf/model_train.py
|
||||
Reference in New Issue
Block a user