beam benchmark tests (#7638)

* beam benchmark tests * lower AMD number somehow * less flaky
2026-02-05 04:05:05 -05:00 · 2024-11-11 18:11:18 -05:00
parent bfab03288d
commit 773d5b60bf
2 changed files with 57 additions and 2 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -170,8 +170,10 @@ jobs:
      run: NV=1 PTX=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_ptx.txt
    - name: Run Tensor Core GEMM (NV)
      run: NV=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_nv.txt
-    - name: Run Tensor Core GEMM (NV) with BEAM
-      run: BEAM=4 NV=1 HALF=1 IGNORE_BEAM_CACHE=1 DEBUG=2 python3 extra/gemm/simple_matmul.py
+    # - name: Run Tensor Core GEMM (NV) with BEAM
+    #   run: BEAM=4 NV=1 HALF=1 IGNORE_BEAM_CACHE=1 DEBUG=2 python3 extra/gemm/simple_matmul.py
+    - name: Test speed vs theoretical
+      run: NV=1 IGNORE_BEAM_CACHE=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py
    - name: Run Stable Diffusion
      run: NV=1 python3 examples/stable_diffusion.py --seed 0 --noshow --timing | tee sd.txt
    - name: Run SDXL
@@ -343,6 +345,8 @@ jobs:
        AMD=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded
    - name: Run Tensor Core GEMM (AMD)
      run: AMD=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_amd.txt
+    - name: Test speed vs theoretical
+      run: AMD=1 IGNORE_BEAM_CACHE=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py
    # TODO: AMD compiler bug causes this to fail
    #- name: Fuzz Padded Tensor Core GEMM
    #  run: HSA=1 M_START=12 M_STOP=20 M_STEP=1 N_START=12 N_STOP=20 N_STEP=1 K_START=28 K_STOP=36 K_STEP=1 HALF=1 TC_OPT=2 DEBUG=2 python3 ./extra/gemm/fuzz_matmul.py