increase CI speed with more runners [pr] (#8961)

* increase CI speed with more runners [pr] * splits + cleanups [pr] * more runners * need that dep * split that too * can't be minimal * move test readme * bugfix + naming * one more split * bump to 22.04
2026-01-10 23:48:01 -05:00 · 2025-02-08 09:04:36 +08:00
parent 11d50324d8
commit 5bdd6a1cc4
1 changed files with 192 additions and 111 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -137,10 +137,10 @@ jobs:
        ./autogen_stubs.sh llvm
        diff /tmp/llvm.py.bak tinygrad/runtime/autogen/llvm.py

-  uops:
-    name: uops tests
+  tc:
+    name: Tensor Core tests
    runs-on: ubuntu-latest
-    timeout-minutes: 20
+    timeout-minutes: 10
    steps:
    - name: Checkout Code
      uses: actions/checkout@v4
@@ -195,6 +195,19 @@ jobs:
        PYTHONPATH=. DEBUG=2 EMULATE_CUDA=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
        PYTHONPATH=. DEBUG=2 EMULATE_INTEL=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
        PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStats.test_simple_matmul
+
+  bepython:
+    name: Python Backend
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v4
+    - name: Setup Environment
+      uses: ./.github/actions/setup-tinygrad
+      with:
+        key: be-minimal
+        deps: testing_minimal
    - name: Test dtype with Python emulator
      run: DEBUG=1 PYTHONPATH=. PYTHON=1 python3 -m pytest -n=auto test/test_dtype.py test/test_dtype_alu.py
    - name: Test ops with Python emulator
@@ -207,9 +220,9 @@ jobs:
      run: PYTHONPATH=. PYTHON=1 python3 -m pytest -rA test/test_linearizer_failures.py::TestLinearizerFailures::test_failure_1

  linter:
-    name: Linters+fuzz+unit Tests
+    name: Linters
    runs-on: ubuntu-latest
-    timeout-minutes: 20
+    timeout-minutes: 10

    # TODO: run the pre-commit hook to replace a lot of this
    steps:
@@ -218,9 +231,9 @@ jobs:
    - name: Setup Environment
      uses: ./.github/actions/setup-tinygrad
      with:
-        key: linting
+        key: linting-only
        python-version: '3.10'
-        deps: linting,testing
+        deps: linting
    - name: Lint bad-indentation and trailing-whitespace with pylint
      run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string='  ' --recursive=y .
    - name: Lint with ruff
@@ -231,32 +244,58 @@ jobs:
      run: python -m pylint tinygrad/
    - name: Run mypy
      run: python -m mypy --strict-equality --lineprecision-report . && cat lineprecision.txt
+
+  unittest:
+    name: Unit Tests
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v4
+    - name: Setup Environment
+      uses: ./.github/actions/setup-tinygrad
+      with:
+        key: unittest-12
+        deps: testing
    - name: Test README
      run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py &&  PYTHONPATH=. python README.py
    - name: Run unit tests
      run: PYTHONPATH="." python -m pytest -n=auto test/unit/
+    - name: Repo line count < 11200 lines
+      run: MAX_LINE_COUNT=11200 python sz.py
+
+  fuzzing:
+    name: Fuzzing
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v4
+    - name: Setup Environment
+      uses: ./.github/actions/setup-tinygrad
+      with:
+        key: fuzzing-minimal
+        deps: testing_minimal
    - name: Fuzz Test symbolic
      run: python test/external/fuzz_symbolic.py
    - name: Fuzz Test shapetracker
      run: |
        PYTHONPATH="." python test/external/fuzz_shapetracker.py
        PYTHONPATH="." python test/external/fuzz_shapetracker_math.py
-    - name: Repo line count < 11200 lines
-      run: MAX_LINE_COUNT=11200 python sz.py

  testgpuimage:
-    name: 'GPU IMAGE+compile Tests'
+    name: 'GPU IMAGE Tests'
    runs-on: ubuntu-20.04
-    timeout-minutes: 20
+    timeout-minutes: 10
    steps:
      - name: Checkout Code
        uses: actions/checkout@v4
      - name: Setup Environment
        uses: ./.github/actions/setup-tinygrad
        with:
-          key: gpuimage
-          deps: testing
-          python-version: '3.11'
+          key: gpu-image
+          deps: testing_minimal
          opencl: 'true'
      - name: Run Kernel Count Test
        run: PYTHONPATH="." GPU=1 python -m pytest -n=auto test/external/external_test_opt.py
@@ -266,6 +305,22 @@ jobs:
        run: |
          PYTHONPATH="." GPU=1 IMAGE=2 python -m pytest -n=auto test/test_ops.py --durations=20
          PYTHONPATH="." GPU=1 IMAGE=2 python3 test/models/test_end2end.py TestEnd2End.test_linear_mnist
+      - name: Run process replay tests
+        uses: ./.github/actions/process-replay
+
+  testopenpilot:
+    name: 'openpilot Compile Tests'
+    runs-on: ubuntu-20.04
+    timeout-minutes: 10
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v4
+      - name: Setup Environment
+        uses: ./.github/actions/setup-tinygrad
+        with:
+          key: openpilot-compile
+          deps: testing
+          opencl: 'true'
      - name: Test openpilot model kernel count and gate usage
        run: |
          PYTHONPATH="." ALLOWED_KERNEL_COUNT=209 ALLOWED_READ_IMAGE=2105 ALLOWED_GATED_READ_IMAGE=29 FLOAT16=0 GPU=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/v0.9.4/selfdrive/modeld/models/supercombo.onnx
@@ -278,7 +333,7 @@ jobs:

  testopencl:
    name: 'ONNX+Optimization Tests'
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
    timeout-minutes: 20

    steps:
@@ -287,14 +342,17 @@ jobs:
      - name: Setup Environment
        uses: ./.github/actions/setup-tinygrad
        with:
-          key: onnxopt
+          key: onnxoptl
          deps: testing,testing_tf
          python-version: '3.11'
          opencl: 'true'
+          llvm: 'true'
      - name: Test ONNX (GPU)
        run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
      - name: Test ONNX (CLANG)
        run: CLANG=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
+      - name: Test ONNX (LLVM)
+        run: LLVM=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
      - name: Run CLOUD=1 Test
        run: |
          CLOUDDEV=CLANG CLOUD=1 python3 test/test_tiny.py
@@ -314,7 +372,7 @@ jobs:
        uses: ./.github/actions/process-replay

  testdsp:
-    name: DSP Tests
+    name: Linux (DSP)
    runs-on: ubuntu-24.04
    timeout-minutes: 10
    steps:
@@ -337,7 +395,7 @@ jobs:
      run: DEBUG=2 DSP=1 python test/test_tiny.py

  testwebgpu:
-    name: WebGPU Tests
+    name: Linux (WebGPU)
    runs-on: ubuntu-22.04
    timeout-minutes: 20
    steps:
@@ -362,56 +420,6 @@ jobs:
    - name: Run process replay tests
      uses: ./.github/actions/process-replay

-  testmetal:
-    name: Metal Tests
-    runs-on: macos-14
-    timeout-minutes: 20
-
-    steps:
-    - name: Checkout Code
-      uses: actions/checkout@v4
-    - name: Setup Environment
-      uses: ./.github/actions/setup-tinygrad
-      with:
-        key: metal
-        deps: testing
-        python-version: '3.11'
-        webgpu: 'true'
-    - name: Check Device.DEFAULT (METAL) and print some source
-      run: |
-        METAL=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'METAL', Device.DEFAULT"
-        METAL=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
-    - name: Run metal test
-      run: JIT=2 METAL=1 python -m pytest -n=auto test/ --ignore=test/external --ignore=test/models --ignore=test/unit --durations=20
-    - name: Run real world test
-      run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20
-    - name: Run ONNX
-      run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
-    - name: Test tensor core ops (fake)
-      run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm
-    - name: Test tensor core ops (real)
-      run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm
-    - name: Test LLaMA compile speed
-      run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py
-    - name: Test Beam Search
-      run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
-    - name: Fuzz Test linearizer
-      run: PYTHONPATH="." METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py
-    # - name: Fuzz Test models schedule
-    # run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py
-    - name: Run TRANSCENDENTAL math
-      run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
-    # WebGPU e2e tests
-    - name: Build WEBGPU Efficientnet
-      run: WEBGPU=1 python3 -m examples.compile_efficientnet
-    - name: Clean npm cache
-      run: npm cache clean --force
-    - name: Install Puppeteer
-      run: npm install puppeteer
-    - name: Run WEBGPU Efficientnet
-      run: node test/web/test_webgpu.js
-    - name: Run process replay tests
-      uses: ./.github/actions/process-replay

  tests:
    strategy:
@@ -419,7 +427,7 @@ jobs:
      matrix:
        backend: [llvm, clang, gpu, ptx, amd, nv] #, triton]

-    name: Tests on (${{ matrix.backend }})
+    name: Linux (${{ matrix.backend }})
    runs-on: ubuntu-22.04
    timeout-minutes: 20

@@ -443,9 +451,6 @@ jobs:
      - name: Run pytest (not cuda or amd)
        if: matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd' && matrix.backend != 'nv'
        run: python -m pytest -n=auto test/ --ignore=test/unit --durations=20
-      - name: Run ONNX (only LLVM)
-        if: matrix.backend == 'llvm'
-        run: python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
      - name: Run pytest (cuda)
        if: matrix.backend=='ptx'||matrix.backend=='triton'||matrix.backend=='nv'
        run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors)' --ignore=test/external --ignore=test/models --ignore=test/unit --ignore test/test_gc.py --durations=20
@@ -457,13 +462,110 @@ jobs:
      - name: Run process replay tests
        uses: ./.github/actions/process-replay

-  osxtests:
-    strategy:
-      fail-fast: false
+# ****** OSX Tests ******

-    name: Tests on MacOS
+  testmetal2:
+    name: MacOS (unit)
+    runs-on: macos-14
+    timeout-minutes: 10
+
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v4
+    - name: Setup Environment
+      uses: ./.github/actions/setup-tinygrad
+      with:
+        key: metal2
+        deps: testing
+        python-version: '3.11'
+    - name: Run real world test
+      run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20
+    - name: Run ONNX
+      run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
+    - name: Test tensor core ops (fake)
+      run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm
+    - name: Test tensor core ops (real)
+      run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm
+    - name: Test LLaMA compile speed
+      run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py
+    - name: Test Beam Search
+      run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
+    - name: Fuzz Test linearizer
+      run: PYTHONPATH="." METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py
+    # - name: Fuzz Test models schedule
+    # run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py
+    - name: Run TRANSCENDENTAL math
+      run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
+    - name: Run process replay tests
+      uses: ./.github/actions/process-replay
+
+  testmetal:
+    name: MacOS (metal)
+    runs-on: macos-14
+    timeout-minutes: 10
+
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v4
+    - name: Setup Environment
+      uses: ./.github/actions/setup-tinygrad
+      with:
+        key: metal
+        deps: testing
+        python-version: '3.11'
+    - name: Check Device.DEFAULT (METAL) and print some source
+      run: |
+        METAL=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'METAL', Device.DEFAULT"
+        METAL=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
+    - name: Run metal test
+      run: JIT=2 METAL=1 python -m pytest -n=auto test/ --ignore=test/external --ignore=test/models --ignore=test/unit --durations=20
+    - name: Run process replay tests
+      uses: ./.github/actions/process-replay
+
+  osxwebgpu:
+    name: MacOS (WebGPU)
+    runs-on: macos-14
+    timeout-minutes: 10
+    steps:
+    - name: Checkout Code
+      uses: actions/checkout@v4
+    - name: Setup Environment
+      uses: ./.github/actions/setup-tinygrad
+      with:
+        key: osx-webgpu
+        webgpu: 'true'
+    - name: Build WEBGPU Efficientnet
+      run: WEBGPU=1 python3 -m examples.compile_efficientnet
+    - name: Clean npm cache
+      run: npm cache clean --force
+    - name: Install Puppeteer
+      run: npm install puppeteer
+    - name: Run WEBGPU Efficientnet
+      run: node test/web/test_webgpu.js
+
+  osxclang:
+    name: MacOS (clang)
    runs-on: macos-15
-    timeout-minutes: 45
+    timeout-minutes: 10
+    steps:
+      - name: Checkout Code
+        uses: actions/checkout@v4
+      - name: Setup Environment
+        uses: ./.github/actions/setup-tinygrad
+        with:
+          key: macos-clang
+          deps: testing
+      - name: Run pytest (clang)
+        env:
+          CLANG: 1
+        run: |
+          python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
+          ! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$'
+
+  osxtests:
+    name: MacOS (amd+llvm)
+    runs-on: macos-15
+    timeout-minutes: 10
    steps:
      - name: Checkout Code
        uses: actions/checkout@v4
@@ -474,27 +576,6 @@ jobs:
          deps: testing
          amd: 'true'
          llvm: 'true'
-      - name: Check Device.DEFAULT and print some source (AMD)
-        env:
-          PYTHONPATH: ${{ github.workspace }}
-          MOCKGPU: 1
-          AMD: 1
-          FORWARD_ONLY: 1
-        run: |
-          python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'AMD', Device.DEFAULT"
-          DEBUG=5 python3 test/test_ops.py TestOps.test_add
-      - name: Check Device.DEFAULT and print some source (LLVM)
-        env:
-          LLVM: 1
-        run: |
-          python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'LLVM', Device.DEFAULT"
-          DEBUG=5 python3 test/test_ops.py TestOps.test_add
-      - name: Check Device.DEFAULT and print some source (CLANG)
-        env:
-          CLANG: 1
-        run: |
-          python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'CLANG', Device.DEFAULT"
-          DEBUG=5 python3 test/test_ops.py TestOps.test_add
      - name: Run pytest (amd)
        env:
          MOCKGPU: 1
@@ -508,17 +589,18 @@ jobs:
        run: |
          python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
          ! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$'
-      - name: Run pytest (clang)
-        env:
-          CLANG: 1
-        run: |
-          python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
-          ! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$'
+
+# ****** Windows Tests ******

  wintests:
-    name: Tests on Windows (llvm+clang)
+    strategy:
+      fail-fast: false
+      matrix:
+        backend: [llvm, clang]
+
+    name: Windows (${{ matrix.backend }})
    runs-on: windows-latest
-    timeout-minutes: 45
+    timeout-minutes: 10
    steps:
      - name: Checkout Code
        uses: actions/checkout@v4
@@ -527,9 +609,8 @@ jobs:
        with:
          key: windows-minimal
          deps: testing_minimal
-      - name: Run pytest (llvm)
+      - name: Set env
+        run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1'}}"
+      - name: Run pytest (${{ matrix.backend }})
        shell: bash
-        run: LLVM=1 python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20
-      - name: Run pytest (clang)
-        shell: bash
-        run: CLANG=1 python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20
+        run: python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20