llama 7B on 6 gpus benchmark (#3773)

2026-01-09 15:08:02 -05:00 · 2024-03-16 11:38:52 -04:00
parent 07324b56d5
commit 77febb44e6
1 changed files with 5 additions and 2 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -153,12 +153,14 @@ jobs:
      run: HSA=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
    - name: Run Stable Diffusion
      run: HSA=1 python3 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd.txt
-    - name: Run LLaMA
+    - name: Run LLaMA 7B
      run: |
        HSA=1 JIT=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_unjitted.txt
        HSA=1 JIT=1 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_jitted.txt
-    - name: Run LLaMA with BEAM
+    - name: Run LLaMA 7B with BEAM
      run: HSA=1 JIT=1 BEAM=2 CACHELEVEL=0 python3 examples/llama.py --gen 1 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_beam.txt
+    - name: Run LLaMA 7B on 6 GPUs
+      run: HSA=1 python3 examples/llama.py --gen 1 --size 7B --shard 6 --prompt "Hello." --count 10 --temperature 0  --timing | tee llama_six_gpu.txt
    - name: Run LLaMA-2 70B
      run: HSA=1 python3 examples/llama.py --gen 2 --size 70B --shard 6 --prompt "Hello." --count 10 --temperature 0  --timing | tee llama_2_70B.txt
    - name: Run Mixtral 8x7B
@@ -176,6 +178,7 @@ jobs:
          llama_unjitted.txt
          llama_jitted.txt
          llama_beam.txt
+          llama_six_gpu.txt
          llama_2_70B.txt
          gpt2_unjitted.txt
          gpt2_jitted.txt