enable llama 2 70B in tinybox green CI (#5905)

runnable with MAX_CONTEXT=256
2026-02-04 19:55:18 -05:00 · 2024-08-04 18:48:46 -04:00
parent 4a65010de8
commit adba5efc64
1 changed files with 3 additions and 3 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -188,8 +188,8 @@ jobs:
      run: NV=1 python3 examples/llama3.py --shard 4 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_four_gpu.txt
    - name: Run LLaMA-3 8B on 6 GPUs
      run: NV=1 python3 examples/llama3.py --shard 6 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_six_gpu.txt
-    # - name: Run LLaMA-2 70B
-    #   run: CUDA=1 python3 examples/llama.py --gen 2 --size 70B --shard 6 --prompt "Hello." --count 10 --temperature 0  --timing | tee llama_2_70B.txt
+    - name: Run LLaMA-2 70B
+      run: NV=1 MAX_CONTEXT=256 python3 examples/llama.py --gen 2 --size 70B --shard 6 --prompt "Hello." --count 10 --temperature 0  --timing | tee llama_2_70B.txt
    - name: Run Mixtral 8x7B
      run: time NV=1 python3 examples/mixtral.py --temperature 0 --count 10 --timing | tee mixtral.txt
    - name: Run GPT2
@@ -220,7 +220,7 @@ jobs:
          llama3_beam.txt
          llama3_four_gpu.txt
          llama3_six_gpu.txt
-          # llama_2_70B.txt
+          llama_2_70B.txt
          mixtral.txt
          gpt2_unjitted.txt
          gpt2_jitted.txt