smaller LLAMA_LAYER in Test llama 3 training (#12516)

very slow now
This commit is contained in:
chenyu
2025-10-08 17:10:51 +08:00
committed by GitHub
parent e701106a64
commit 942022c309

View File

@@ -451,7 +451,8 @@ jobs:
- name: Test Bert training
run: NULL=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=24 GPUS=4 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py
- name: Test llama 3 training
run: NULL=1 SAMPLES=300 BS=8 SEQLEN=512 GRADIENT_ACC_STEPS=8 FAKEDATA=1 DEFAULT_FLOAT=bfloat16 OPTIM_DTYPE=bfloat16 LLAMA3_SIZE=1B MODEL=llama3 python3 examples/mlperf/model_train.py
# TODO: remove LLAMA_LAYERS once it's fast
run: NULL=1 SAMPLES=300 BS=8 SEQLEN=512 GRADIENT_ACC_STEPS=8 LLAMA_LAYERS=4 FAKEDATA=1 DEFAULT_FLOAT=bfloat16 OPTIM_DTYPE=bfloat16 LLAMA3_SIZE=1B MODEL=llama3 python3 examples/mlperf/model_train.py
- name: Run process replay tests
uses: ./.github/actions/process-replay