add more gpt2 variant in mac/nvidia benchmark (#3599)

2026-01-09 15:08:02 -05:00 · 2024-03-03 17:55:30 -05:00
parent 968d109453
commit 8e5d60a322
1 changed files with 8 additions and 1 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -46,6 +46,10 @@ jobs:
      run: |
        JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt
        JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt
+    - name: Run GPT2 w HALF
+      run: JIT=1 HALF=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt
+    - name: Run GPT2 w HALF/BEAM
+      run: JIT=1 HALF=1 BEAM=2 CACHELEVEL=0 CAST_BEFORE_VIEW=0 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half_beam.txt
    - name: Run 10 CIFAR training steps
      run: STEPS=10 python3 examples/hlb_cifar10.py | tee train_cifar.txt
    # TODO: this is flaky too
@@ -63,6 +67,8 @@ jobs:
          llama_jitted.txt
          gpt2_unjitted.txt
          gpt2_jitted.txt
+          gpt2_half.txt
+          gpt2_half_beam.txt
          matmul.txt
          matmul_half.txt
          sd.txt
@@ -92,7 +98,7 @@ jobs:
        CUDA=1 JIT=0 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_unjitted.txt
        CUDA=1 JIT=1 python3 examples/gpt2.py --prompt "Hello." --count 10 --temperature 0 --timing | tee gpt2_jitted.txt
    - name: Run GPT2 w HALF
-      run: CUDA=1 JIT=1 HALF=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing
+      run: CUDA=1 JIT=1 HALF=1 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half.txt
    - name: Run GPT2 w HALF/BEAM
      run: CUDA=1 JIT=1 HALF=1 BEAM=2 CACHELEVEL=0 CAST_BEFORE_VIEW=0 python3 examples/gpt2.py --count 10 --temperature 0 --timing | tee gpt2_half_beam.txt
    - uses: actions/upload-artifact@v4
@@ -104,6 +110,7 @@ jobs:
          matmul.txt
          gpt2_unjitted.txt
          gpt2_jitted.txt
+          gpt2_half.txt
          gpt2_half_beam.txt

  testamdbenchmark: