increase CI speed with more runners [pr] (#8961)

* increase CI speed with more runners [pr]

* splits + cleanups [pr]

* more runners

* need that dep

* split that too

* can't be minimal

* move test readme

* bugfix + naming

* one more split

* bump to 22.04
This commit is contained in:
George Hotz
2025-02-08 09:04:36 +08:00
committed by GitHub
parent 11d50324d8
commit 5bdd6a1cc4

View File

@@ -137,10 +137,10 @@ jobs:
./autogen_stubs.sh llvm
diff /tmp/llvm.py.bak tinygrad/runtime/autogen/llvm.py
uops:
name: uops tests
tc:
name: Tensor Core tests
runs-on: ubuntu-latest
timeout-minutes: 20
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
@@ -195,6 +195,19 @@ jobs:
PYTHONPATH=. DEBUG=2 EMULATE_CUDA=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
PYTHONPATH=. DEBUG=2 EMULATE_INTEL=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStats.test_simple_matmul
bepython:
name: Python Backend
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: be-minimal
deps: testing_minimal
- name: Test dtype with Python emulator
run: DEBUG=1 PYTHONPATH=. PYTHON=1 python3 -m pytest -n=auto test/test_dtype.py test/test_dtype_alu.py
- name: Test ops with Python emulator
@@ -207,9 +220,9 @@ jobs:
run: PYTHONPATH=. PYTHON=1 python3 -m pytest -rA test/test_linearizer_failures.py::TestLinearizerFailures::test_failure_1
linter:
name: Linters+fuzz+unit Tests
name: Linters
runs-on: ubuntu-latest
timeout-minutes: 20
timeout-minutes: 10
# TODO: run the pre-commit hook to replace a lot of this
steps:
@@ -218,9 +231,9 @@ jobs:
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: linting
key: linting-only
python-version: '3.10'
deps: linting,testing
deps: linting
- name: Lint bad-indentation and trailing-whitespace with pylint
run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y .
- name: Lint with ruff
@@ -231,32 +244,58 @@ jobs:
run: python -m pylint tinygrad/
- name: Run mypy
run: python -m mypy --strict-equality --lineprecision-report . && cat lineprecision.txt
unittest:
name: Unit Tests
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: unittest-12
deps: testing
- name: Test README
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py && PYTHONPATH=. python README.py
- name: Run unit tests
run: PYTHONPATH="." python -m pytest -n=auto test/unit/
- name: Repo line count < 11200 lines
run: MAX_LINE_COUNT=11200 python sz.py
fuzzing:
name: Fuzzing
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: fuzzing-minimal
deps: testing_minimal
- name: Fuzz Test symbolic
run: python test/external/fuzz_symbolic.py
- name: Fuzz Test shapetracker
run: |
PYTHONPATH="." python test/external/fuzz_shapetracker.py
PYTHONPATH="." python test/external/fuzz_shapetracker_math.py
- name: Repo line count < 11200 lines
run: MAX_LINE_COUNT=11200 python sz.py
testgpuimage:
name: 'GPU IMAGE+compile Tests'
name: 'GPU IMAGE Tests'
runs-on: ubuntu-20.04
timeout-minutes: 20
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: gpuimage
deps: testing
python-version: '3.11'
key: gpu-image
deps: testing_minimal
opencl: 'true'
- name: Run Kernel Count Test
run: PYTHONPATH="." GPU=1 python -m pytest -n=auto test/external/external_test_opt.py
@@ -266,6 +305,22 @@ jobs:
run: |
PYTHONPATH="." GPU=1 IMAGE=2 python -m pytest -n=auto test/test_ops.py --durations=20
PYTHONPATH="." GPU=1 IMAGE=2 python3 test/models/test_end2end.py TestEnd2End.test_linear_mnist
- name: Run process replay tests
uses: ./.github/actions/process-replay
testopenpilot:
name: 'openpilot Compile Tests'
runs-on: ubuntu-20.04
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: openpilot-compile
deps: testing
opencl: 'true'
- name: Test openpilot model kernel count and gate usage
run: |
PYTHONPATH="." ALLOWED_KERNEL_COUNT=209 ALLOWED_READ_IMAGE=2105 ALLOWED_GATED_READ_IMAGE=29 FLOAT16=0 GPU=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/v0.9.4/selfdrive/modeld/models/supercombo.onnx
@@ -278,7 +333,7 @@ jobs:
testopencl:
name: 'ONNX+Optimization Tests'
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
timeout-minutes: 20
steps:
@@ -287,14 +342,17 @@ jobs:
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: onnxopt
key: onnxoptl
deps: testing,testing_tf
python-version: '3.11'
opencl: 'true'
llvm: 'true'
- name: Test ONNX (GPU)
run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test ONNX (CLANG)
run: CLANG=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test ONNX (LLVM)
run: LLVM=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Run CLOUD=1 Test
run: |
CLOUDDEV=CLANG CLOUD=1 python3 test/test_tiny.py
@@ -314,7 +372,7 @@ jobs:
uses: ./.github/actions/process-replay
testdsp:
name: DSP Tests
name: Linux (DSP)
runs-on: ubuntu-24.04
timeout-minutes: 10
steps:
@@ -337,7 +395,7 @@ jobs:
run: DEBUG=2 DSP=1 python test/test_tiny.py
testwebgpu:
name: WebGPU Tests
name: Linux (WebGPU)
runs-on: ubuntu-22.04
timeout-minutes: 20
steps:
@@ -362,56 +420,6 @@ jobs:
- name: Run process replay tests
uses: ./.github/actions/process-replay
testmetal:
name: Metal Tests
runs-on: macos-14
timeout-minutes: 20
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: metal
deps: testing
python-version: '3.11'
webgpu: 'true'
- name: Check Device.DEFAULT (METAL) and print some source
run: |
METAL=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'METAL', Device.DEFAULT"
METAL=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
- name: Run metal test
run: JIT=2 METAL=1 python -m pytest -n=auto test/ --ignore=test/external --ignore=test/models --ignore=test/unit --durations=20
- name: Run real world test
run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20
- name: Run ONNX
run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test tensor core ops (fake)
run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm
- name: Test tensor core ops (real)
run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm
- name: Test LLaMA compile speed
run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py
- name: Test Beam Search
run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
- name: Fuzz Test linearizer
run: PYTHONPATH="." METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py
# - name: Fuzz Test models schedule
# run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py
- name: Run TRANSCENDENTAL math
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
# WebGPU e2e tests
- name: Build WEBGPU Efficientnet
run: WEBGPU=1 python3 -m examples.compile_efficientnet
- name: Clean npm cache
run: npm cache clean --force
- name: Install Puppeteer
run: npm install puppeteer
- name: Run WEBGPU Efficientnet
run: node test/web/test_webgpu.js
- name: Run process replay tests
uses: ./.github/actions/process-replay
tests:
strategy:
@@ -419,7 +427,7 @@ jobs:
matrix:
backend: [llvm, clang, gpu, ptx, amd, nv] #, triton]
name: Tests on (${{ matrix.backend }})
name: Linux (${{ matrix.backend }})
runs-on: ubuntu-22.04
timeout-minutes: 20
@@ -443,9 +451,6 @@ jobs:
- name: Run pytest (not cuda or amd)
if: matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd' && matrix.backend != 'nv'
run: python -m pytest -n=auto test/ --ignore=test/unit --durations=20
- name: Run ONNX (only LLVM)
if: matrix.backend == 'llvm'
run: python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Run pytest (cuda)
if: matrix.backend=='ptx'||matrix.backend=='triton'||matrix.backend=='nv'
run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors)' --ignore=test/external --ignore=test/models --ignore=test/unit --ignore test/test_gc.py --durations=20
@@ -457,13 +462,110 @@ jobs:
- name: Run process replay tests
uses: ./.github/actions/process-replay
osxtests:
strategy:
fail-fast: false
# ****** OSX Tests ******
name: Tests on MacOS
testmetal2:
name: MacOS (unit)
runs-on: macos-14
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: metal2
deps: testing
python-version: '3.11'
- name: Run real world test
run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20
- name: Run ONNX
run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test tensor core ops (fake)
run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm
- name: Test tensor core ops (real)
run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm
- name: Test LLaMA compile speed
run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py
- name: Test Beam Search
run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
- name: Fuzz Test linearizer
run: PYTHONPATH="." METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py
# - name: Fuzz Test models schedule
# run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py
- name: Run TRANSCENDENTAL math
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
- name: Run process replay tests
uses: ./.github/actions/process-replay
testmetal:
name: MacOS (metal)
runs-on: macos-14
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: metal
deps: testing
python-version: '3.11'
- name: Check Device.DEFAULT (METAL) and print some source
run: |
METAL=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'METAL', Device.DEFAULT"
METAL=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
- name: Run metal test
run: JIT=2 METAL=1 python -m pytest -n=auto test/ --ignore=test/external --ignore=test/models --ignore=test/unit --durations=20
- name: Run process replay tests
uses: ./.github/actions/process-replay
osxwebgpu:
name: MacOS (WebGPU)
runs-on: macos-14
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: osx-webgpu
webgpu: 'true'
- name: Build WEBGPU Efficientnet
run: WEBGPU=1 python3 -m examples.compile_efficientnet
- name: Clean npm cache
run: npm cache clean --force
- name: Install Puppeteer
run: npm install puppeteer
- name: Run WEBGPU Efficientnet
run: node test/web/test_webgpu.js
osxclang:
name: MacOS (clang)
runs-on: macos-15
timeout-minutes: 45
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: macos-clang
deps: testing
- name: Run pytest (clang)
env:
CLANG: 1
run: |
python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$'
osxtests:
name: MacOS (amd+llvm)
runs-on: macos-15
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
@@ -474,27 +576,6 @@ jobs:
deps: testing
amd: 'true'
llvm: 'true'
- name: Check Device.DEFAULT and print some source (AMD)
env:
PYTHONPATH: ${{ github.workspace }}
MOCKGPU: 1
AMD: 1
FORWARD_ONLY: 1
run: |
python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'AMD', Device.DEFAULT"
DEBUG=5 python3 test/test_ops.py TestOps.test_add
- name: Check Device.DEFAULT and print some source (LLVM)
env:
LLVM: 1
run: |
python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'LLVM', Device.DEFAULT"
DEBUG=5 python3 test/test_ops.py TestOps.test_add
- name: Check Device.DEFAULT and print some source (CLANG)
env:
CLANG: 1
run: |
python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'CLANG', Device.DEFAULT"
DEBUG=5 python3 test/test_ops.py TestOps.test_add
- name: Run pytest (amd)
env:
MOCKGPU: 1
@@ -508,17 +589,18 @@ jobs:
run: |
python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$'
- name: Run pytest (clang)
env:
CLANG: 1
run: |
python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20
! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$'
# ****** Windows Tests ******
wintests:
name: Tests on Windows (llvm+clang)
strategy:
fail-fast: false
matrix:
backend: [llvm, clang]
name: Windows (${{ matrix.backend }})
runs-on: windows-latest
timeout-minutes: 45
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
@@ -527,9 +609,8 @@ jobs:
with:
key: windows-minimal
deps: testing_minimal
- name: Run pytest (llvm)
- name: Set env
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1'}}"
- name: Run pytest (${{ matrix.backend }})
shell: bash
run: LLVM=1 python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20
- name: Run pytest (clang)
shell: bash
run: CLANG=1 python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20
run: python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20