DISABLE_COMPILER_CACHE=1 in speed_v_theoretical (#13096)

This commit is contained in:
chenyu
2025-11-04 11:28:18 -05:00
committed by GitHub
parent 1c9f720654
commit 54141e9cb9
2 changed files with 2 additions and 3 deletions

View File

@@ -199,7 +199,7 @@ jobs:
- name: Test speed vs torch
run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
- name: Test speed vs theoretical
run: NV=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
run: NV=1 IGNORE_BEAM_CACHE=1 DISABLE_COMPILER_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
- name: Test benchmark allreduce
run: NV=1 python test/external/external_benchmark_multitensor_allreduce.py
- name: Test tensor cores
@@ -409,7 +409,7 @@ jobs:
# python3 -c "import torch; print(torch.__version__)"
# LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
- name: Test speed vs theoretical
run: AMD=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
run: AMD=1 IGNORE_BEAM_CACHE=1 DISABLE_COMPILER_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
- name: Test tensor cores
run: |
AMD=1 AMD_LLVM=0 python3 test/opt/test_tensor_cores.py

View File

@@ -85,7 +85,6 @@ class TestKernelSpeed(unittest.TestCase):
gbs = mems / tm / 1e9
self._compare(tm, tflops, gbs, nv_tflops, nv_gbs, amd_tflops, amd_gbs)
# NOTE: tiny7 was slower than tiny12
# TODO: why are convs so slow?!?
def test_conv_3x3_256_32_32_256_256(self): self._test_conv_3x3(256, 32, 32, 256, 256, nv_tflops=27, amd_tflops=14)