From 942022c309d8c303165fc0db162022d21d4dbeab Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 8 Oct 2025 17:10:51 +0800 Subject: [PATCH] smaller LLAMA_LAYER in Test llama 3 training (#12516) very slow now --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2740f62b7e..4138b17888 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -451,7 +451,8 @@ jobs: - name: Test Bert training run: NULL=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=24 GPUS=4 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py - name: Test llama 3 training - run: NULL=1 SAMPLES=300 BS=8 SEQLEN=512 GRADIENT_ACC_STEPS=8 FAKEDATA=1 DEFAULT_FLOAT=bfloat16 OPTIM_DTYPE=bfloat16 LLAMA3_SIZE=1B MODEL=llama3 python3 examples/mlperf/model_train.py + # TODO: remove LLAMA_LAYERS once it's fast + run: NULL=1 SAMPLES=300 BS=8 SEQLEN=512 GRADIENT_ACC_STEPS=8 LLAMA_LAYERS=4 FAKEDATA=1 DEFAULT_FLOAT=bfloat16 OPTIM_DTYPE=bfloat16 LLAMA3_SIZE=1B MODEL=llama3 python3 examples/mlperf/model_train.py - name: Run process replay tests uses: ./.github/actions/process-replay