7B llama on 4 gpus on benchmark (#3804)

2026-01-09 06:58:11 -05:00 · 2024-03-18 14:32:37 -04:00
parent d79a1d315b
commit 1711274654
1 changed files with 3 additions and 0 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -159,6 +159,8 @@ jobs:
        HSA=1 JIT=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt
    - name: Run LLaMA 7B with BEAM
      run: HSA=1 JIT=1 BEAM=2 CACHELEVEL=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_beam.txt
+    - name: Run LLaMA 7B on 4 GPUs
+      run: HSA=1 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 10 --temperature 0  --timing | tee llama_four_gpu.txt
    - name: Run LLaMA 7B on 6 GPUs
      run: HSA=1 python3 examples/llama.py --gen 1 --size 7B --shard 6 --prompt "Hello." --count 10 --temperature 0  --timing | tee llama_six_gpu.txt
    - name: Run LLaMA-2 70B
@@ -178,6 +180,7 @@ jobs:
          llama_unjitted.txt
          llama_jitted.txt
          llama_beam.txt
+          llama_four_gpu.txt
          llama_six_gpu.txt
          llama_2_70B.txt
          gpt2_unjitted.txt