DISABLE_COMPILER_CACHE=1 in speed_v_theoretical (#13096)

2026-01-09 15:08:02 -05:00 · 2025-11-04 11:28:18 -05:00
parent 1c9f720654
commit 54141e9cb9
2 changed files with 2 additions and 3 deletions
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -199,7 +199,7 @@ jobs:
    - name: Test speed vs torch
      run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
    - name: Test speed vs theoretical
-      run: NV=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
+      run: NV=1 IGNORE_BEAM_CACHE=1 DISABLE_COMPILER_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
    - name: Test benchmark allreduce
      run: NV=1 python test/external/external_benchmark_multitensor_allreduce.py
    - name: Test tensor cores
@@ -409,7 +409,7 @@ jobs:
    #    python3 -c "import torch; print(torch.__version__)"
    #    LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
    - name: Test speed vs theoretical
-      run: AMD=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
+      run: AMD=1 IGNORE_BEAM_CACHE=1 DISABLE_COMPILER_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
    - name: Test tensor cores
      run: |
        AMD=1 AMD_LLVM=0 python3 test/opt/test_tensor_cores.py
--- a/test/external/speed_v_theoretical.py
+++ b/test/external/speed_v_theoretical.py
@@ -85,7 +85,6 @@ class TestKernelSpeed(unittest.TestCase):
    gbs = mems / tm / 1e9
    self._compare(tm, tflops, gbs, nv_tflops, nv_gbs, amd_tflops, amd_gbs)

-  # NOTE: tiny7 was slower than tiny12
  # TODO: why are convs so slow?!?
  def test_conv_3x3_256_32_32_256_256(self): self._test_conv_3x3(256, 32, 32, 256, 256, nv_tflops=27, amd_tflops=14)