diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 3270119b30..3e0d6504f7 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -188,8 +188,8 @@ jobs: run: NV=1 python3 examples/llama3.py --shard 4 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_four_gpu.txt - name: Run LLaMA-3 8B on 6 GPUs run: NV=1 python3 examples/llama3.py --shard 6 --model weights/LLaMA-3/8B-SF-DPO/ --benchmark --temperature 0 | tee llama3_six_gpu.txt - # - name: Run LLaMA-2 70B - # run: CUDA=1 python3 examples/llama.py --gen 2 --size 70B --shard 6 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_2_70B.txt + - name: Run LLaMA-2 70B + run: NV=1 MAX_CONTEXT=256 python3 examples/llama.py --gen 2 --size 70B --shard 6 --prompt "Hello." --count 10 --temperature 0 --timing | tee llama_2_70B.txt - name: Run Mixtral 8x7B run: time NV=1 python3 examples/mixtral.py --temperature 0 --count 10 --timing | tee mixtral.txt - name: Run GPT2 @@ -220,7 +220,7 @@ jobs: llama3_beam.txt llama3_four_gpu.txt llama3_six_gpu.txt - # llama_2_70B.txt + llama_2_70B.txt mixtral.txt gpt2_unjitted.txt gpt2_jitted.txt