From 8af8808c61a97f1a7b4fb79b190bd517e43bba8b Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Tue, 19 Aug 2025 21:21:07 -0700 Subject: [PATCH] cleanup tests, bump caches (#11746) --- .github/actions/setup-tinygrad/action.yml | 14 ++++++------- .github/workflows/benchmark.yml | 6 +++--- .github/workflows/test.yml | 10 +++++---- test/{ => device}/test_ocl.py | 0 .../external_benchmark_kernel_launch.py | 2 +- .../external_test_copy_speed.py} | 0 .../external_test_device_speed.py} | 0 .../external_test_speed_v_torch.py | 0 test/test_compile_failures.py | 17 ++++++++++++++- test/test_disassembly.py | 21 ------------------- 10 files changed, 33 insertions(+), 37 deletions(-) rename test/{ => device}/test_ocl.py (100%) rename test/{test_copy_speed.py => speed/external_test_copy_speed.py} (100%) rename test/{test_device_speed.py => speed/external_test_device_speed.py} (100%) rename test/{external => speed}/external_test_speed_v_torch.py (100%) delete mode 100644 test/test_disassembly.py diff --git a/.github/actions/setup-tinygrad/action.yml b/.github/actions/setup-tinygrad/action.yml index e6e95248ef..051acada34 100644 --- a/.github/actions/setup-tinygrad/action.yml +++ b/.github/actions/setup-tinygrad/action.yml @@ -121,7 +121,7 @@ runs: echo 'Acquire::GzipIndexes "true";' | sudo tee /etc/apt/apt.conf.d/gzip echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' | sudo tee -a /etc/apt/apt.conf.d/99keep-debs - + - name: Add OpenCL Repo if: inputs.opencl == 'true' && runner.os == 'Linux' shell: bash @@ -174,7 +174,7 @@ runs: if [[ "${{ inputs.llvm }}" == "true" ]]; then pkgs+=" libllvm20 clang-20 lld-20" fi - + echo "pkgs=$pkgs" >> "$GITHUB_OUTPUT" echo "hash=$(echo -n "$pkgs" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT" @@ -183,21 +183,21 @@ runs: uses: actions/cache@v4 with: path: /var/cache/apt/archives/ - key: ${{ runner.os }}-apt-${{ steps.apt-pkgs.outputs.hash }} + key: ${{ runner.os }}-apt-${{ steps.apt-pkgs.outputs.hash }}-${{ env.APT_CACHE_VERSION }} - name: Run apt Update + Install if: runner.os == 'Linux' && (inputs.opencl == 'true' || inputs.amd == 'true' || inputs.cuda == 'true' || inputs.webgpu == 'true' || inputs.llvm == 'true') shell: bash run: | sudo apt -qq update || true - + # ******** do install ******** if [[ -n "${{ steps.apt-pkgs.outputs.pkgs }}" ]]; then sudo apt-get -y --allow-unauthenticated --no-install-recommends install ${{ steps.apt-pkgs.outputs.pkgs }} fi - + sudo chown -R $USER:$USER /var/cache/apt/archives/ - + # **** AMD **** - name: Setup AMD (Linux) if: inputs.amd == 'true' && runner.os == 'Linux' @@ -234,7 +234,7 @@ runs: cache-name: cache-gpuocelot-build with: path: ${{ github.workspace }}/gpuocelot/ocelot - key: ${{ runner.os }}-gpuocelot-b16039dc940dc6bc4ea0a98380495769ff35ed99-rebuild-0 + key: ${{ runner.os }}-gpuocelot-b16039dc940dc6bc4ea0a98380495769ff35ed99-rebuild-${{ env.BUILD_CACHE_VERSION }} - name: Clone/compile gpuocelot if: inputs.ocelot == 'true' && steps.cache-build.outputs.cache-hit != 'true' shell: bash diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 9eb1248a9c..d01bb69314 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -63,7 +63,7 @@ jobs: - name: Run model inference benchmark run: METAL=1 python3.11 test/external/external_model_benchmark.py - name: Test speed vs torch - run: BIG=2 MPS=1 python3.11 test/external/external_test_speed_v_torch.py | tee torch_speed.txt + run: BIG=2 MPS=1 python3.11 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt - name: Test tensor cores run: METAL=1 python3.11 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops - name: Test AMX tensor cores @@ -187,7 +187,7 @@ jobs: - name: Run model inference benchmark run: NV=1 CAPTURE_PROCESS_REPLAY=0 NOCLANG=1 python3 test/external/external_model_benchmark.py - name: Test speed vs torch - run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/external/external_test_speed_v_torch.py | tee torch_speed.txt + run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt - name: Test speed vs theoretical run: NV=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20 - name: Test benchmark allreduce @@ -389,7 +389,7 @@ jobs: #- name: Test speed vs torch # run: | # python3 -c "import torch; print(torch.__version__)" - # LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/external/external_test_speed_v_torch.py | tee torch_speed.txt + # LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt - name: Test speed vs theoretical run: AMD=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20 - name: Test tensor cores diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 73a951af3e..59e3b0ee1c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,8 +1,10 @@ name: Unit Tests env: # increment this when downloads substantially change to avoid the internet - DOWNLOAD_CACHE_VERSION: '11' - PYTHON_CACHE_VERSION: '2' + DOWNLOAD_CACHE_VERSION: '12' + PYTHON_CACHE_VERSION: '3' + APT_CACHE_VERSION: '1' + BUILD_CACHE_VERSION: '1' CAPTURE_PROCESS_REPLAY: 1 GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -30,9 +32,9 @@ jobs: - name: External Benchmark Schedule run: PYTHONPATH="." python3 test/external/external_benchmark_schedule.py - name: Speed Test - run: LLVM=1 python3 test/external/external_test_speed_v_torch.py + run: LLVM=1 python3 test/speed/external_test_speed_v_torch.py - name: Speed Test (BEAM=2) - run: BEAM=2 LLVM=1 python3 test/external/external_test_speed_v_torch.py + run: BEAM=2 LLVM=1 python3 test/speed/external_test_speed_v_torch.py docs: name: Docs diff --git a/test/test_ocl.py b/test/device/test_ocl.py similarity index 100% rename from test/test_ocl.py rename to test/device/test_ocl.py diff --git a/test/external/external_benchmark_kernel_launch.py b/test/external/external_benchmark_kernel_launch.py index 7011f7ab28..1369ddc0d5 100644 --- a/test/external/external_benchmark_kernel_launch.py +++ b/test/external/external_benchmark_kernel_launch.py @@ -2,7 +2,7 @@ import time from tinygrad import Tensor, TinyJit, Device, Context from tinygrad.helpers import Profiling, Timing, GlobalCounters -# python3 test/external/external_test_speed_v_torch.py TestSpeed.test_add_a +# python3 test/speed/external_test_speed_v_torch.py TestSpeed.test_add_a @TinyJit def plus(a:Tensor, b:Tensor): return a+b diff --git a/test/test_copy_speed.py b/test/speed/external_test_copy_speed.py similarity index 100% rename from test/test_copy_speed.py rename to test/speed/external_test_copy_speed.py diff --git a/test/test_device_speed.py b/test/speed/external_test_device_speed.py similarity index 100% rename from test/test_device_speed.py rename to test/speed/external_test_device_speed.py diff --git a/test/external/external_test_speed_v_torch.py b/test/speed/external_test_speed_v_torch.py similarity index 100% rename from test/external/external_test_speed_v_torch.py rename to test/speed/external_test_speed_v_torch.py diff --git a/test/test_compile_failures.py b/test/test_compile_failures.py index 5ab87c0b62..16559e907a 100644 --- a/test/test_compile_failures.py +++ b/test/test_compile_failures.py @@ -1,7 +1,10 @@ -import unittest +import unittest, io +from contextlib import redirect_stdout from tinygrad import Tensor, dtypes, Device +from tinygrad.helpers import OSX from tinygrad.engine.realize import lower_schedule from tinygrad.device import is_dtype_supported +from tinygrad.engine.realize import get_program class TestCompileFailures(unittest.TestCase): def compile(self, out:Tensor): @@ -14,5 +17,17 @@ class TestCompileFailures(unittest.TestCase): def test_add_max_uchar(self): self.compile((Tensor.empty(1024, dtype='uint8') + Tensor.empty(1024, dtype='uint8')).max()) +class TestDisassembly(unittest.TestCase): + # TODO: fails on llvm. llvm.LLVMGetHostCPUName() returns "generic" + @unittest.skipUnless(Device.DEFAULT in ("CPU",) and OSX, "m series cpus support fp16 arithmetic") + def test_float16_alu(self): + c = Tensor([1], dtype=dtypes.float16) + Tensor([1], dtype=dtypes.float16) + s = c.schedule()[-1] + p = get_program(s.ast, Device[Device.DEFAULT].renderer) + lib = Device[Device.DEFAULT].compiler.compile(p.src) + out = io.StringIO() + with redirect_stdout(out): Device[Device.DEFAULT].compiler.disassemble(lib) + assert "fcvt" not in out.getvalue() + if __name__ == '__main__': unittest.main() diff --git a/test/test_disassembly.py b/test/test_disassembly.py deleted file mode 100644 index e908b83710..0000000000 --- a/test/test_disassembly.py +++ /dev/null @@ -1,21 +0,0 @@ -import unittest, io -from tinygrad import Tensor, dtypes -from contextlib import redirect_stdout -from tinygrad.device import Device -from tinygrad.helpers import OSX -from tinygrad.engine.realize import get_program - -class TestDisassembly(unittest.TestCase): - # TODO: fails on llvm. llvm.LLVMGetHostCPUName() returns "generic" - @unittest.skipUnless(Device.DEFAULT in ("CPU",) and OSX, "m series cpus support fp16 arithmetic") - def test_float16_alu(self): - c = Tensor([1], dtype=dtypes.float16) + Tensor([1], dtype=dtypes.float16) - s = c.schedule()[-1] - p = get_program(s.ast, Device[Device.DEFAULT].renderer) - lib = Device[Device.DEFAULT].compiler.compile(p.src) - out = io.StringIO() - with redirect_stdout(out): Device[Device.DEFAULT].compiler.disassemble(lib) - assert "fcvt" not in out.getvalue() - -if __name__ == "__main__": - unittest.main() \ No newline at end of file