mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 15:08:02 -05:00
DISABLE_COMPILER_CACHE=1 in speed_v_theoretical (#13096)
This commit is contained in:
4
.github/workflows/benchmark.yml
vendored
4
.github/workflows/benchmark.yml
vendored
@@ -199,7 +199,7 @@ jobs:
|
||||
- name: Test speed vs torch
|
||||
run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
|
||||
- name: Test speed vs theoretical
|
||||
run: NV=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
|
||||
run: NV=1 IGNORE_BEAM_CACHE=1 DISABLE_COMPILER_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
|
||||
- name: Test benchmark allreduce
|
||||
run: NV=1 python test/external/external_benchmark_multitensor_allreduce.py
|
||||
- name: Test tensor cores
|
||||
@@ -409,7 +409,7 @@ jobs:
|
||||
# python3 -c "import torch; print(torch.__version__)"
|
||||
# LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
|
||||
- name: Test speed vs theoretical
|
||||
run: AMD=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
|
||||
run: AMD=1 IGNORE_BEAM_CACHE=1 DISABLE_COMPILER_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
|
||||
- name: Test tensor cores
|
||||
run: |
|
||||
AMD=1 AMD_LLVM=0 python3 test/opt/test_tensor_cores.py
|
||||
|
||||
1
test/external/speed_v_theoretical.py
vendored
1
test/external/speed_v_theoretical.py
vendored
@@ -85,7 +85,6 @@ class TestKernelSpeed(unittest.TestCase):
|
||||
gbs = mems / tm / 1e9
|
||||
self._compare(tm, tflops, gbs, nv_tflops, nv_gbs, amd_tflops, amd_gbs)
|
||||
|
||||
# NOTE: tiny7 was slower than tiny12
|
||||
# TODO: why are convs so slow?!?
|
||||
def test_conv_3x3_256_32_32_256_256(self): self._test_conv_3x3(256, 32, 32, 256, 256, nv_tflops=27, amd_tflops=14)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user