From 6a5eacba8b8bddcee185f1362897713821813ebf Mon Sep 17 00:00:00 2001 From: chenyu Date: Wed, 2 Apr 2025 03:19:09 -0400 Subject: [PATCH] disable CI red llama 3 4 gpu beam (#9690) device hangs and ci would fail --- .github/workflows/benchmark.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 053b66f4d8..a75090bddd 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -401,8 +401,9 @@ jobs: # run: AMD=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama.py --gen 1 --size 7B --shard 6 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_six_gpu.txt - name: Run LLaMA-3 8B BEAM run: AMD=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 python3 examples/llama3.py --size 8B --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_beam.txt - - name: Run LLaMA-3 8B on 4 GPUs with BEAM - run: AMD=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 4 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_four_gpu.txt + # TODO: device hangs + # - name: Run LLaMA-3 8B on 4 GPUs with BEAM + # run: AMD=1 JITBEAM=2 IGNORE_BEAM_CACHE=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 4 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_four_gpu.txt # - name: Run LLaMA-3 8B on 6 GPUs # run: AMD=1 CAPTURE_PROCESS_REPLAY=0 python3 examples/llama3.py --size 8B --shard 6 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_six_gpu.txt - name: Restore amdgpu