From 5bdd6a1cc4b274bdcf4758049b48a18ed765c2a0 Mon Sep 17 00:00:00 2001 From: George Hotz <72895+geohot@users.noreply.github.com> Date: Sat, 8 Feb 2025 09:04:36 +0800 Subject: [PATCH] increase CI speed with more runners [pr] (#8961) * increase CI speed with more runners [pr] * splits + cleanups [pr] * more runners * need that dep * split that too * can't be minimal * move test readme * bugfix + naming * one more split * bump to 22.04 --- .github/workflows/test.yml | 303 +++++++++++++++++++++++-------------- 1 file changed, 192 insertions(+), 111 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 269eb8e1c7..a7e23b7f18 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -137,10 +137,10 @@ jobs: ./autogen_stubs.sh llvm diff /tmp/llvm.py.bak tinygrad/runtime/autogen/llvm.py - uops: - name: uops tests + tc: + name: Tensor Core tests runs-on: ubuntu-latest - timeout-minutes: 20 + timeout-minutes: 10 steps: - name: Checkout Code uses: actions/checkout@v4 @@ -195,6 +195,19 @@ jobs: PYTHONPATH=. DEBUG=2 EMULATE_CUDA=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf PYTHONPATH=. DEBUG=2 EMULATE_INTEL=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStats.test_simple_matmul + + bepython: + name: Python Backend + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Setup Environment + uses: ./.github/actions/setup-tinygrad + with: + key: be-minimal + deps: testing_minimal - name: Test dtype with Python emulator run: DEBUG=1 PYTHONPATH=. PYTHON=1 python3 -m pytest -n=auto test/test_dtype.py test/test_dtype_alu.py - name: Test ops with Python emulator @@ -207,9 +220,9 @@ jobs: run: PYTHONPATH=. PYTHON=1 python3 -m pytest -rA test/test_linearizer_failures.py::TestLinearizerFailures::test_failure_1 linter: - name: Linters+fuzz+unit Tests + name: Linters runs-on: ubuntu-latest - timeout-minutes: 20 + timeout-minutes: 10 # TODO: run the pre-commit hook to replace a lot of this steps: @@ -218,9 +231,9 @@ jobs: - name: Setup Environment uses: ./.github/actions/setup-tinygrad with: - key: linting + key: linting-only python-version: '3.10' - deps: linting,testing + deps: linting - name: Lint bad-indentation and trailing-whitespace with pylint run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y . - name: Lint with ruff @@ -231,32 +244,58 @@ jobs: run: python -m pylint tinygrad/ - name: Run mypy run: python -m mypy --strict-equality --lineprecision-report . && cat lineprecision.txt + + unittest: + name: Unit Tests + runs-on: ubuntu-latest + timeout-minutes: 10 + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Setup Environment + uses: ./.github/actions/setup-tinygrad + with: + key: unittest-12 + deps: testing - name: Test README run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py && PYTHONPATH=. python README.py - name: Run unit tests run: PYTHONPATH="." python -m pytest -n=auto test/unit/ + - name: Repo line count < 11200 lines + run: MAX_LINE_COUNT=11200 python sz.py + + fuzzing: + name: Fuzzing + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Setup Environment + uses: ./.github/actions/setup-tinygrad + with: + key: fuzzing-minimal + deps: testing_minimal - name: Fuzz Test symbolic run: python test/external/fuzz_symbolic.py - name: Fuzz Test shapetracker run: | PYTHONPATH="." python test/external/fuzz_shapetracker.py PYTHONPATH="." python test/external/fuzz_shapetracker_math.py - - name: Repo line count < 11200 lines - run: MAX_LINE_COUNT=11200 python sz.py testgpuimage: - name: 'GPU IMAGE+compile Tests' + name: 'GPU IMAGE Tests' runs-on: ubuntu-20.04 - timeout-minutes: 20 + timeout-minutes: 10 steps: - name: Checkout Code uses: actions/checkout@v4 - name: Setup Environment uses: ./.github/actions/setup-tinygrad with: - key: gpuimage - deps: testing - python-version: '3.11' + key: gpu-image + deps: testing_minimal opencl: 'true' - name: Run Kernel Count Test run: PYTHONPATH="." GPU=1 python -m pytest -n=auto test/external/external_test_opt.py @@ -266,6 +305,22 @@ jobs: run: | PYTHONPATH="." GPU=1 IMAGE=2 python -m pytest -n=auto test/test_ops.py --durations=20 PYTHONPATH="." GPU=1 IMAGE=2 python3 test/models/test_end2end.py TestEnd2End.test_linear_mnist + - name: Run process replay tests + uses: ./.github/actions/process-replay + + testopenpilot: + name: 'openpilot Compile Tests' + runs-on: ubuntu-20.04 + timeout-minutes: 10 + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Setup Environment + uses: ./.github/actions/setup-tinygrad + with: + key: openpilot-compile + deps: testing + opencl: 'true' - name: Test openpilot model kernel count and gate usage run: | PYTHONPATH="." ALLOWED_KERNEL_COUNT=209 ALLOWED_READ_IMAGE=2105 ALLOWED_GATED_READ_IMAGE=29 FLOAT16=0 GPU=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/v0.9.4/selfdrive/modeld/models/supercombo.onnx @@ -278,7 +333,7 @@ jobs: testopencl: name: 'ONNX+Optimization Tests' - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 timeout-minutes: 20 steps: @@ -287,14 +342,17 @@ jobs: - name: Setup Environment uses: ./.github/actions/setup-tinygrad with: - key: onnxopt + key: onnxoptl deps: testing,testing_tf python-version: '3.11' opencl: 'true' + llvm: 'true' - name: Test ONNX (GPU) run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 - name: Test ONNX (CLANG) run: CLANG=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 + - name: Test ONNX (LLVM) + run: LLVM=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 - name: Run CLOUD=1 Test run: | CLOUDDEV=CLANG CLOUD=1 python3 test/test_tiny.py @@ -314,7 +372,7 @@ jobs: uses: ./.github/actions/process-replay testdsp: - name: DSP Tests + name: Linux (DSP) runs-on: ubuntu-24.04 timeout-minutes: 10 steps: @@ -337,7 +395,7 @@ jobs: run: DEBUG=2 DSP=1 python test/test_tiny.py testwebgpu: - name: WebGPU Tests + name: Linux (WebGPU) runs-on: ubuntu-22.04 timeout-minutes: 20 steps: @@ -362,56 +420,6 @@ jobs: - name: Run process replay tests uses: ./.github/actions/process-replay - testmetal: - name: Metal Tests - runs-on: macos-14 - timeout-minutes: 20 - - steps: - - name: Checkout Code - uses: actions/checkout@v4 - - name: Setup Environment - uses: ./.github/actions/setup-tinygrad - with: - key: metal - deps: testing - python-version: '3.11' - webgpu: 'true' - - name: Check Device.DEFAULT (METAL) and print some source - run: | - METAL=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'METAL', Device.DEFAULT" - METAL=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add - - name: Run metal test - run: JIT=2 METAL=1 python -m pytest -n=auto test/ --ignore=test/external --ignore=test/models --ignore=test/unit --durations=20 - - name: Run real world test - run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20 - - name: Run ONNX - run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 - - name: Test tensor core ops (fake) - run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm - - name: Test tensor core ops (real) - run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm - - name: Test LLaMA compile speed - run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py - - name: Test Beam Search - run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py - - name: Fuzz Test linearizer - run: PYTHONPATH="." METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py - # - name: Fuzz Test models schedule - # run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py - - name: Run TRANSCENDENTAL math - run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20 - # WebGPU e2e tests - - name: Build WEBGPU Efficientnet - run: WEBGPU=1 python3 -m examples.compile_efficientnet - - name: Clean npm cache - run: npm cache clean --force - - name: Install Puppeteer - run: npm install puppeteer - - name: Run WEBGPU Efficientnet - run: node test/web/test_webgpu.js - - name: Run process replay tests - uses: ./.github/actions/process-replay tests: strategy: @@ -419,7 +427,7 @@ jobs: matrix: backend: [llvm, clang, gpu, ptx, amd, nv] #, triton] - name: Tests on (${{ matrix.backend }}) + name: Linux (${{ matrix.backend }}) runs-on: ubuntu-22.04 timeout-minutes: 20 @@ -443,9 +451,6 @@ jobs: - name: Run pytest (not cuda or amd) if: matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd' && matrix.backend != 'nv' run: python -m pytest -n=auto test/ --ignore=test/unit --durations=20 - - name: Run ONNX (only LLVM) - if: matrix.backend == 'llvm' - run: python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 - name: Run pytest (cuda) if: matrix.backend=='ptx'||matrix.backend=='triton'||matrix.backend=='nv' run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors)' --ignore=test/external --ignore=test/models --ignore=test/unit --ignore test/test_gc.py --durations=20 @@ -457,13 +462,110 @@ jobs: - name: Run process replay tests uses: ./.github/actions/process-replay - osxtests: - strategy: - fail-fast: false +# ****** OSX Tests ****** - name: Tests on MacOS + testmetal2: + name: MacOS (unit) + runs-on: macos-14 + timeout-minutes: 10 + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Setup Environment + uses: ./.github/actions/setup-tinygrad + with: + key: metal2 + deps: testing + python-version: '3.11' + - name: Run real world test + run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20 + - name: Run ONNX + run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 + - name: Test tensor core ops (fake) + run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm + - name: Test tensor core ops (real) + run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm + - name: Test LLaMA compile speed + run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py + - name: Test Beam Search + run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py + - name: Fuzz Test linearizer + run: PYTHONPATH="." METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py + # - name: Fuzz Test models schedule + # run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py + - name: Run TRANSCENDENTAL math + run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20 + - name: Run process replay tests + uses: ./.github/actions/process-replay + + testmetal: + name: MacOS (metal) + runs-on: macos-14 + timeout-minutes: 10 + + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Setup Environment + uses: ./.github/actions/setup-tinygrad + with: + key: metal + deps: testing + python-version: '3.11' + - name: Check Device.DEFAULT (METAL) and print some source + run: | + METAL=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'METAL', Device.DEFAULT" + METAL=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add + - name: Run metal test + run: JIT=2 METAL=1 python -m pytest -n=auto test/ --ignore=test/external --ignore=test/models --ignore=test/unit --durations=20 + - name: Run process replay tests + uses: ./.github/actions/process-replay + + osxwebgpu: + name: MacOS (WebGPU) + runs-on: macos-14 + timeout-minutes: 10 + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Setup Environment + uses: ./.github/actions/setup-tinygrad + with: + key: osx-webgpu + webgpu: 'true' + - name: Build WEBGPU Efficientnet + run: WEBGPU=1 python3 -m examples.compile_efficientnet + - name: Clean npm cache + run: npm cache clean --force + - name: Install Puppeteer + run: npm install puppeteer + - name: Run WEBGPU Efficientnet + run: node test/web/test_webgpu.js + + osxclang: + name: MacOS (clang) runs-on: macos-15 - timeout-minutes: 45 + timeout-minutes: 10 + steps: + - name: Checkout Code + uses: actions/checkout@v4 + - name: Setup Environment + uses: ./.github/actions/setup-tinygrad + with: + key: macos-clang + deps: testing + - name: Run pytest (clang) + env: + CLANG: 1 + run: | + python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20 + ! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$' + + osxtests: + name: MacOS (amd+llvm) + runs-on: macos-15 + timeout-minutes: 10 steps: - name: Checkout Code uses: actions/checkout@v4 @@ -474,27 +576,6 @@ jobs: deps: testing amd: 'true' llvm: 'true' - - name: Check Device.DEFAULT and print some source (AMD) - env: - PYTHONPATH: ${{ github.workspace }} - MOCKGPU: 1 - AMD: 1 - FORWARD_ONLY: 1 - run: | - python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'AMD', Device.DEFAULT" - DEBUG=5 python3 test/test_ops.py TestOps.test_add - - name: Check Device.DEFAULT and print some source (LLVM) - env: - LLVM: 1 - run: | - python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'LLVM', Device.DEFAULT" - DEBUG=5 python3 test/test_ops.py TestOps.test_add - - name: Check Device.DEFAULT and print some source (CLANG) - env: - CLANG: 1 - run: | - python3 -c "from tinygrad import Device; assert Device.DEFAULT == 'CLANG', Device.DEFAULT" - DEBUG=5 python3 test/test_ops.py TestOps.test_add - name: Run pytest (amd) env: MOCKGPU: 1 @@ -508,17 +589,18 @@ jobs: run: | python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20 ! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$' - - name: Run pytest (clang) - env: - CLANG: 1 - run: | - python3 -m pytest -n=auto test/ --ignore=test/unit --durations=20 - ! (DEBUG=7 python3 test/test_zero_copy.py 2>&1 || true) | grep -E '^0x.*[^0](x18|w18).*$' + +# ****** Windows Tests ****** wintests: - name: Tests on Windows (llvm+clang) + strategy: + fail-fast: false + matrix: + backend: [llvm, clang] + + name: Windows (${{ matrix.backend }}) runs-on: windows-latest - timeout-minutes: 45 + timeout-minutes: 10 steps: - name: Checkout Code uses: actions/checkout@v4 @@ -527,9 +609,8 @@ jobs: with: key: windows-minimal deps: testing_minimal - - name: Run pytest (llvm) + - name: Set env + run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1'}}" + - name: Run pytest (${{ matrix.backend }}) shell: bash - run: LLVM=1 python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20 - - name: Run pytest (clang) - shell: bash - run: CLANG=1 python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20 + run: python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20