From a0965ee1988cb4bfd08e829d4cff1829fa591ff0 Mon Sep 17 00:00:00 2001 From: cheeetoo <102839837+cheeetoo@users.noreply.github.com> Date: Sun, 23 Jul 2023 15:00:56 -0500 Subject: [PATCH] CI < 5 minutes (#1252) * models matrix * fix typo and install gpu deps * install llvm deps if needed * fix * testops with cuda * remove pip cache since not work * cuda env * install cuda deps * maybe it will work now * i can't read * all tests in matrix * trim down more * opencl stuff in matrix * opencl pip cache * test split * change cuda test exclusion * test * fix cuda maybe * add models * add more n=auto * third thing * fix bug * cache pip more * change name * update tests * try again cause why not * balance * try again... * try apt cache for cuda * try on gpu: * try cuda again * update packages step * replace libz-dev with zlib1g-dev * only cache cuda * why error * fix gpuocelot bug * apt cache err * apt cache to slow? * opt and image in single runner * add a couple n=autos * remove test matrix * try cuda apt cache again * libz-dev -> zlib1g-dev * remove -s since not supported by xdist * the cache takes too long and doesn't work * combine webgpu and metal tests * combine imagenet to c and cpu tests * torch tests with linters * torch back by itself * small windows clang test with torch tests * fix a goofy windows bug * im dumb * bro * clang with linters * fix pylint error * linter not work on windows * try with clang again * clang and imagenet? * install deps * fix * fix quote * clang by itself (windows too slow) * env vars for imagenet * cache pip for metal and webgpu tests * try torch with metal and webgpu * doesn't work, too long * remove -v * try -n=logical * don't use logical * revert accidental thing * remove some prints unless CI * fix print unless CI * ignore speed tests for slow tests * clang windows in matrix (ubuntu being tested in imagenet->c test) * try manual pip cache * fix windows pip cache path * all manual pip cache * fix pip cache dir for macos * print_ci function in helpers * CI as variable, no print_ci * missed one * cuda tests with docker image * remove setup-python action for cuda * python->python3? * remove -s -v * try fix pip cache * maybe fix * try to fix pip cache * is this the path? * maybe cache pip * try again * create wheels dir * ? * cuda pip deps in dockerfile * disable pip cache for clang * image from ghcr instead of docker hub * why is clang like this * fast deps * try use different caches * remove the fast thing * try with lighter image * remove setup python for cuda * small docker and cuda fast deps * ignore a few more tests * cool docker thing (maybe) * oops * quotes * fix docker command * fix bug * ignore train efficientnet test * remove dockerfile (docker stuff takes too long) * remove docker stuff and normal cuda * oops * ignore the tests for cuda * does this work * ignore test_train on slow backends * add space * llvm ignore same tests as cuda * nvm * ignore lr scheduler tests * get some stats * fix ignore bug * remove extra ' * remove and * ignore test for llvm * change ignored tests and durationon all backends * fix * and -> or * ignore some more cuda tests * finally? * does this fix it * remove durations=0 * add some more tests to llvm * make last pytest more readable * fix * don't train efficientnet on cpu * try w/out pip cache * pip cache seems to be generally better * pytest file markers * try apt fast for cuda * use quick install for apt-fast * apt-fast not worth * apt-get to apt * fix typo * suppress warnings * register markers * disable debug on fuzz tests * change marker names * apt update and apt install in one command * update marker names in test.yml * webgpu pytest marker --- .github/workflows/test.yml | 346 +++++++++++--------------- pytest.ini | 2 + test/external/external_test_yolov8.py | 2 +- test/extra/test_lr_scheduler.py | 3 + test/models/test_end2end.py | 11 +- test/models/test_mnist.py | 3 + test/models/test_onnx.py | 30 ++- test/models/test_train.py | 3 + test/test_assign.py | 3 + test/test_conv.py | 3 + test/test_conv_shapetracker.py | 3 + test/test_custom_function.py | 3 + test/test_dtype.py | 2 +- test/test_jit.py | 3 + test/test_net_speed.py | 3 + test/test_nn.py | 3 + test/test_ops.py | 13 +- test/test_optim.py | 3 + test/test_randomness.py | 3 + test/test_specific_conv.py | 3 + test/test_speed_v_torch.py | 7 +- test/test_tensor.py | 3 + test/unit/test_example.py | 8 +- 23 files changed, 237 insertions(+), 226 deletions(-) create mode 100644 pytest.ini diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 83f137e024..be1076926b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,6 +18,11 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.8 + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: linting - name: Install dependencies run: pip install -e '.[linting,testing]' --extra-index-url https://download.pytorch.org/whl/cpu - name: Repo line count @@ -31,12 +36,12 @@ jobs: - name: Run mypy run: mypy tinygrad/ --ignore-missing-imports --check-untyped-defs --explicit-package-bases --warn-unreachable - name: Install SLOCCount - run: sudo apt-get install sloccount + run: sudo apt install sloccount - name: Check <5000 lines run: sloccount tinygrad test examples extra; if [ $(sloccount tinygrad | sed -n 's/.*Total Physical Source Lines of Code (SLOC)[ ]*= \([^ ]*\).*/\1/p' | tr -d ',') -gt 5000 ]; then exit 1; fi - testcpu: - name: CPU Tests + testcpuimagenet: + name: CPU and ImageNet to C Tests runs-on: ubuntu-latest timeout-minutes: 20 @@ -47,6 +52,11 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.8 + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: testing - name: Install Dependencies run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu - name: Test Docs @@ -54,49 +64,11 @@ jobs: - name: Test Quickstart run: awk '/```python/{flag=1;next}/```/{flag=0}flag' docs/quickstart.md > quickstart.py && PYTHONPATH=. python3 quickstart.py - name: Run Pytest - run: python -m pytest -s -v -n=auto test/ + run: python -m pytest -n=auto test/ -k "not (test_efficientnet and models/test_train.py)" - name: Fuzz Test symbolic - run: DEBUG=1 python test/external/fuzz_symbolic.py + run: python test/external/fuzz_symbolic.py - name: Fuzz Test shapetracker - run: PYTHONPATH="." DEBUG=1 python test/external/fuzz_shapetracker.py - - testwebgpu: - name: WebGPU Tests - runs-on: macos-13 - - steps: - - name: Checkout Code - uses: actions/checkout@v3 - - name: Set up Python 3.8 - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - name: Install Dependencies - run: pip install -e '.[testing,webgpu]' --extra-index-url https://download.pytorch.org/whl/cpu - # - name: Set Env - # run: printf "WEBGPU=1\nWGPU_BACKEND_TYPE=D3D12\n" >> $GITHUB_ENV - - name: Run Pytest - run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -s -v -n=auto test/test_ops.py test/test_speed_v_torch.py test/test_nn.py test/test_jit.py test/test_randomness.py test/test_tensor.py test/test_assign.py test/test_conv.py test/test_nn.py test/test_custom_function.py test/test_conv_shapetracker.py - - name: Build WEBGPU Efficientnet - run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m examples.webgpu.compile_webgpu - # - name: Install Puppeteer - # run: npm install puppeteer - # - name: Run Efficientnet - # run: node test/test_webgpu.js - testimagenet: - name: ImageNet to C Compile Test - runs-on: ubuntu-latest - timeout-minutes: 20 - - steps: - - name: Checkout Code - uses: actions/checkout@v3 - - name: Set up Python 3.8 - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - name: Install Dependencies - run: pip install -e . + run: PYTHONPATH="." python test/external/fuzz_shapetracker.py - name: Compile EfficientNet to C run: PYTHONPATH="." CLANG=1 python3 examples/compile_efficientnet.py > recognize.c - name: Compile C to native @@ -104,44 +76,6 @@ jobs: - name: Test EfficientNet run: curl https://media.istockphoto.com/photos/hen-picture-id831791190 | ./recognize | grep hen - testllvm: - name: LLVM Tests - runs-on: ubuntu-latest - timeout-minutes: 20 - - steps: - - name: Checkout Code - uses: actions/checkout@v3 - - name: Set up Python 3.8 - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - name: Install Dependencies - run: pip install -e '.[llvm,testing]' --extra-index-url https://download.pytorch.org/whl/cpu - - name: Run Pytest - run: ENABLE_METHOD_CACHE=1 LLVM=1 python -m pytest -s -v -n=auto test/ - - testclang: - strategy: - matrix: - os: [ubuntu-latest, windows-latest] - runs-on: ${{ matrix.os }} - name: CLANG Tests ${{ matrix.os }} (w method cache) - - steps: - - name: Checkout Code - uses: actions/checkout@v3 - - name: Set up Python 3.8 - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - name: Install Dependencies - run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu - - name: Set env - run: printf "CI=1\nCLANG=1\nENABLE_METHOD_CACHE=1" >> $GITHUB_ENV - - name: Run Pytest - run: python -m pytest -s -v -n=auto test/ - testtorch: name: Torch Tests runs-on: ubuntu-latest @@ -154,79 +88,72 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.8 + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: testing - name: Install Dependencies run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu - name: Run Pytest - run: TORCH=1 python -m pytest -s -v -n=auto test/ + run: TORCH=1 python -m pytest -n=auto test/ - name: Run ONNX - run: TORCH=1 python -m pytest test/external/external_test_onnx_backend.py --tb=no --disable-warnings || true - - testgpu: - name: GPU Tests - runs-on: ubuntu-20.04 - timeout-minutes: 20 - - steps: - - name: Checkout Code - uses: actions/checkout@v3 - - name: Update packages - run: | - wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null - echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list - sudo apt-get update - - name: Install OpenCL - #run: sudo apt-get install -y pocl-opencl-icd - run: sudo apt-get install -y intel-oneapi-runtime-compilers intel-oneapi-runtime-opencl - - name: Set up Python 3.8 - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - name: Install Dependencies - run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu - - name: Run Optimizer Test (OPT 2 and 3) - run: | - PYTHONPATH="." OPT=2 GPU=1 python test/external/external_test_opt.py - PYTHONPATH="." OPT=3 GPU=1 python test/external/external_test_opt.py - - name: Run Pytest (default) - run: GPU=1 python -m pytest -s -v -n=auto test/ + run: TORCH=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --tb=no --disable-warnings || true testopencl: - name: openpilot (OpenCL) Test + strategy: + matrix: + task: [optimage, openpilot] + name: ${{ matrix.task=='optimage'&&'GPU OPT and IMAGE Tests'||'openpilot (OpenCL) Tests'}} runs-on: ubuntu-20.04 timeout-minutes: 20 steps: - - name: Checkout Code - uses: actions/checkout@v3 - - name: Update packages - run: | - wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null - echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list - sudo apt-get update - - name: Install OpenCL - #run: sudo apt-get install -y pocl-opencl-icd - run: sudo apt-get install -y intel-oneapi-runtime-compilers intel-oneapi-runtime-opencl - - name: Set up Python 3.8 - uses: actions/setup-python@v4 - with: - python-version: 3.8 - - name: Install Dependencies - run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu - - name: Test openpilot model compile and size - run: | - DEBUG=2 ALLOWED_KERNEL_COUNT=199 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python3 openpilot/compile.py - python3 -c 'import os; assert os.path.getsize("/tmp/output.thneed") < 100_000_000' - - name: Test GPU IMAGE ops - run: | - GPU=1 IMAGE=1 python3 test/test_ops.py - FORWARD_ONLY=1 GPU=1 IMAGE=2 python3 test/test_ops.py - - name: Test openpilot model correctness (float32) - run: DEBUGCL=1 GPU=1 IMAGE=2 python3 openpilot/compile.py - - name: Test tensor core ops - run: GPU=1 TC=2 python3 test/test_ops.py + - name: Checkout Code + uses: actions/checkout@v3 + - name: Update packages + run: | + wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null + echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list + sudo apt update + - name: Install OpenCL + #run: sudo apt-get install -y pocl-opencl-icd + run: sudo apt install -y intel-oneapi-runtime-compilers intel-oneapi-runtime-opencl + - name: Set up Python 3.8 + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: testing + - name: Install Dependencies + run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu + - if: ${{ matrix.task == 'optimage' }} + name: Run Optimizer Test (OPT 2 and 3) + run: | + PYTHONPATH="." OPT=2 GPU=1 python -m pytest -n=auto test/external/external_test_opt.py + PYTHONPATH="." OPT=3 GPU=1 python -m pytest -n=auto test/external/external_test_opt.py + - if: ${{ matrix.task == 'optimage'}} + name: Test GPU IMAGE ops + run: | + GPU=1 IMAGE=1 python3 -m pytest -n=auto test/test_ops.py + FORWARD_ONLY=1 GPU=1 IMAGE=2 python3 -m pytest -n=auto test/test_ops.py + - if: ${{ matrix.task == 'openpilot' }} + name: Test openpilot model compile and size + run: | + DEBUG=2 ALLOWED_KERNEL_COUNT=199 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python3 openpilot/compile.py + python3 -c 'import os; assert os.path.getsize("/tmp/output.thneed") < 100_000_000' + - if: ${{ matrix.task == 'openpilot' }} + name: Test openpilot model correctness (float32) + run: DEBUGCL=1 GPU=1 IMAGE=2 python3 openpilot/compile.py + - if: ${{ matrix.task == 'openpilot' }} + name: Test tensor core ops + run: GPU=1 TC=2 python3 -m pytest -n=auto test/test_ops.py - testmetal: - name: Metal Tests + testmetalwebgpu: + name: Metal and WebGPU Tests runs-on: macos-13 timeout-minutes: 20 @@ -237,19 +164,27 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.11 + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/Library/Caches/pip + key: metalwebgpu - name: Install Dependencies - run: pip install -e '.[metal,testing]' + run: pip install -e '.[metal,webgpu,testing]' --extra-index-url https://download.pytorch.org/whl/cpu - name: Test LLaMA compile speed run: PYTHONPATH="." METAL=1 python3 test/external/external_test_speed_llama.py #- name: Run dtype test # run: DEBUG=4 METAL=1 python -m pytest test/test_dtype.py # dtype test has issues on test_half_to_int8 - - name: Run ops test + - name: Run metal ops test run: DEBUG=2 METAL=1 python -m pytest test/test_ops.py - name: Run JIT test run: DEBUG=2 METAL=1 python -m pytest test/test_jit.py # TODO: why not testing the whole test/? - + - name: Run webgpu pytest + run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -n=auto -m 'webgpu' + - name: Build WEBGPU Efficientnet + run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m examples.webgpu.compile_webgpu testdocker: name: Docker Test @@ -264,58 +199,73 @@ jobs: - name: Test Docker run: docker run --rm tinygrad /usr/bin/env python3 -c "from tinygrad.tensor import Tensor; print(Tensor.eye(3).numpy())" + tests: + strategy: + matrix: + backend: [llvm, clang, gpu, cuda] - testcuda: - name: (emulated) cuda test - runs-on: ubuntu-22.04 + name: Tests on (${{ matrix.backend }}) + runs-on: ${{ matrix.backend == 'gpu' && 'ubuntu-20.04' || matrix.backend=='clang'&&'windows-latest'|| 'ubuntu-latest' }} timeout-minutes: 20 steps: - - name: Checkout Code - uses: actions/checkout@v3 - - name: Update packages - run: | - export DEBIAN_FRONTEND=noninteractive - sudo apt-get update -y - - name: Install packages - run: sudo apt-get install -y --no-install-recommends git g++ cmake ninja-build llvm-15-dev libz-dev libglew-dev flex bison libfl-dev libboost-thread-dev libboost-filesystem-dev nvidia-cuda-toolkit-gcc - - name: Cache gpuocelot - id: cache-build - uses: actions/cache@v3 - env: - cache-name: cache-gpuocelot-build - with: - path: ${{ github.workspace }}/gpuocelot/ocelot/ - key: ubuntu22.04-gpuocelot-19626fc00b6ee321638c3111074269c69050e091 - restore-keys: | - ubuntu22.04-gpuocelot-19626fc00b6ee321638c3111074269c69050e091 - - if: ${{ steps.cache-build.outputs.cache-hit != 'true' }} - name: Clone gpuocelot - uses: actions/checkout@v3 - with: - repository: gpuocelot/gpuocelot - ref: 19626fc00b6ee321638c3111074269c69050e091 - path: ${{ github.workspace }}/gpuocelot - submodules: true - - if: ${{ steps.cache-build.outputs.cache-hit != 'true' }} - name: Compile gpuocelot - run: | - cd ${{ github.workspace }}/gpuocelot/ocelot - mkdir build - cd build - cmake .. -Wno-dev -G Ninja -DOCELOT_BUILD_TOOLS=OFF - ninja - - name: Install gpuocelot - run: | - cd ${{ github.workspace }}/gpuocelot/ocelot/build - sudo ninja install - - name: Set up Python 3.8 - uses: actions/setup-python@v4 - with: - python-version: 3.8 - cache: 'pip' - cache-dependency-path: setup.py - - name: Install tinygrad dependencies - run: pip install -e '.[testing, cuda]' --extra-index-url https://download.pytorch.org/whl/cpu - - name: Run pytest - run: FORWARD_ONLY=1 JIT=1 OPT=2 CUDA=1 CUDACPU=1 python -m pytest -s -v -n=auto test --ignore=test/external --ignore=test/models --ignore=test/test_speed_v_torch.py --ignore=test/test_specific_conv.py --ignore=test/test_net_speed.py --ignore=test/test_nn.py -k "not half" + - name: Checkout Code + uses: actions/checkout@v3 + - name: Set up Python 3.8 + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: Cache pip + uses: actions/cache@v3 + with: + path: ${{ matrix.backend=='clang'&&'~\AppData\Local\pip\cache'||'~/.cache/pip' }} + key: ${{ matrix.backend }} + - name: Set env + run: printf "${{ matrix.backend == 'llvm' && 'ENABLE_METHOD_CACHE=1\nLLVM=1' || matrix.backend == 'clang' && 'CLANG=1\nENABLED_METHOD_CACHE=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'cuda' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\n'}}" >> $GITHUB_ENV + - name: Install packages (gpu) + if: matrix.backend == 'gpu' + run: | + wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null + echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list + sudo apt update && \ + sudo apt install -y intel-oneapi-runtime-compilers intel-oneapi-runtime-opencl + - name: Install packages (cuda) + if: matrix.backend == 'cuda' + run: | + export DEBIAN_FRONTEND=noninteractive + sudo apt update -y && \ + sudo apt install -y --no-install-recommends git g++ cmake ninja-build llvm-15-dev zlib1g-dev libglew-dev flex bison libfl-dev libboost-thread-dev libboost-filesystem-dev nvidia-cuda-toolkit-gcc + - name: Cache gpuocelot + if: matrix.backend == 'cuda' + id: cache-build + uses: actions/cache@v3 + env: + cache-name: cache-gpuocelot-build + with: + path: ${{ github.workspace }}/gpuocelot/ocelot/ + key: ubuntu22.04-gpuocelot-19626fc00b6ee321638c3111074269c69050e091 + restore-keys: | + ubuntu22.04-gpuocelot-19626fc00b6ee321638c3111074269c69050e091 + - name: Clone/compile gpuocelot + if: matrix.backend == 'cuda' && steps.cache-build.outputs.cache-hit != 'true' + run: | + git clone --recurse-submodules https://github.com/gpuocelot/gpuocelot.git ${{ github.workspace }}/gpuocelot + cd ${{ github.workspace }}/gpuocelot/ocelot + git checkout 19626fc00b6ee321638c3111074269c69050e091 + mkdir build + cd build + cmake .. -Wno-dev -G Ninja -DOCELOT_BUILD_TOOLS=OFF + ninja + - name: Install gpuocelot + if: matrix.backend == 'cuda' + run: | + cd ${{ github.workspace }}/gpuocelot/ocelot/build + sudo ninja install + - name: Install dependencies + run: pip install -e '.[testing${{matrix.backend=='llvm'&&',llvm'||matrix.backend=='cuda'&&',cuda'||''}}]' --extra-index-url https://download.pytorch.org/whl/cpu + - name: Run pytest (not cuda) + if: matrix.backend!='cuda' + run: python -m pytest -n=auto test/ -k '${{matrix.backend=='llvm'&&'not (test_nn.py and test_conv_transpose2d)'||'test'}}' -m 'not exclude_${{matrix.backend}}' + - name: Run pytest (cuda) + if: matrix.backend=='cuda' + run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors) and not (test_conv2d and test_tensor.py)' -m 'not exclude_cuda' --ignore=test/external --ignore=test/models \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000..0f6e52a427 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +markers = ['exclude_cuda', 'exclude_gpu', 'exclude_clang', 'webgpu'] \ No newline at end of file diff --git a/test/external/external_test_yolov8.py b/test/external/external_test_yolov8.py index 2053f2203d..e250c59c2e 100644 --- a/test/external/external_test_yolov8.py +++ b/test/external/external_test_yolov8.py @@ -73,5 +73,5 @@ class TestYOLOv8(unittest.TestCase): np.testing.assert_allclose(onnx_output[0], tiny_output.cpu().numpy(), atol=5e-4, rtol=0.025) if __name__ == '__main__': - unittest.main() + unittest.main() \ No newline at end of file diff --git a/test/extra/test_lr_scheduler.py b/test/extra/test_lr_scheduler.py index 1e39c3d4e0..283652b48a 100644 --- a/test/extra/test_lr_scheduler.py +++ b/test/extra/test_lr_scheduler.py @@ -7,6 +7,9 @@ from tinygrad.nn.optim import Adam from extra.lr_scheduler import MultiStepLR, ReduceLROnPlateau, CosineAnnealingLR, OneCycleLR from extra.training import train, evaluate from extra.datasets import fetch_mnist +import pytest + +pytestmark = [pytest.mark.exclude_cuda, pytest.mark.exclude_gpu] np.random.seed(1337) Tensor.manual_seed(1337) diff --git a/test/models/test_end2end.py b/test/models/test_end2end.py index 09a7f0ed19..b206e50bc7 100644 --- a/test/models/test_end2end.py +++ b/test/models/test_end2end.py @@ -6,13 +6,14 @@ from tinygrad.state import get_parameters, get_state_dict from tinygrad.nn import optim, Linear, Conv2d, BatchNorm2d from tinygrad.tensor import Tensor from extra.datasets import fetch_mnist +from tinygrad.helpers import CI def compare_tiny_torch(model, model_torch, X, Y): Tensor.training = True model_torch.train() model_state_dict = get_state_dict(model) for k,v in model_torch.named_parameters(): - print(f"initting {k} from torch") + if not CI: print(f"initting {k} from torch") model_state_dict[k].assign(Tensor(v.detach().numpy())).realize() optimizer = optim.SGD(get_parameters(model), lr=0.01) @@ -23,11 +24,11 @@ def compare_tiny_torch(model, model_torch, X, Y): out = model(X) loss = (out * Y).mean() - print(loss.realize().numpy()) + if not CI: print(loss.realize().numpy()) out_torch = model_torch(torch.Tensor(X.numpy())) loss_torch = (out_torch * torch.Tensor(Y.numpy())).mean() - print(loss_torch.detach().numpy()) + if not CI: print(loss_torch.detach().numpy()) # assert losses match np.testing.assert_allclose(loss.realize().numpy(), loss_torch.detach().numpy(), atol=1e-4) @@ -41,7 +42,7 @@ def compare_tiny_torch(model, model_torch, X, Y): for k,v in list(model_torch.named_parameters())[::-1]: g = model_state_dict[k].grad.numpy() gt = v.grad.detach().numpy() - print("testing grads", k) + if not CI: print("testing grads", k) np.testing.assert_allclose(g, gt, atol=1e-3, err_msg=f'grad mismatch {k}') # take the steps @@ -50,7 +51,7 @@ def compare_tiny_torch(model, model_torch, X, Y): # assert weights match (they don't!) for k,v in model_torch.named_parameters(): - print("testing weight", k) + if not CI: print("testing weight", k) np.testing.assert_allclose(model_state_dict[k].numpy(), v.detach().numpy(), atol=1e-3, err_msg=f'weight mismatch {k}') def get_mnist_data(): diff --git a/test/models/test_mnist.py b/test/models/test_mnist.py index e990f3a0e5..fca4c85084 100644 --- a/test/models/test_mnist.py +++ b/test/models/test_mnist.py @@ -6,6 +6,9 @@ from tinygrad.tensor import Tensor, Device from tinygrad.nn import optim, BatchNorm2d from extra.training import train, evaluate from extra.datasets import fetch_mnist +import pytest + +pytestmark = [pytest.mark.exclude_gpu, pytest.mark.exclude_clang] # load the mnist dataset X_train, Y_train, X_test, Y_test = fetch_mnist() diff --git a/test/models/test_onnx.py b/test/models/test_onnx.py index e09be76f00..156dd52131 100644 --- a/test/models/test_onnx.py +++ b/test/models/test_onnx.py @@ -8,6 +8,10 @@ import onnx from extra.utils import fetch, temp from extra.onnx import get_run_onnx from tinygrad.tensor import Tensor +from tinygrad.helpers import CI +import pytest + +pytestmark = [pytest.mark.exclude_gpu, pytest.mark.exclude_clang] def run_onnx_torch(onnx_model, inputs): import torch @@ -48,22 +52,24 @@ class TestOnnxModel(unittest.TestCase): mt2 = time.monotonic() tinygrad_out = tinygrad_out.numpy() et = time.monotonic() - print(f"ran openpilot model in {(et-st)*1000.0:.2f} ms, waited {(mt2-mt)*1000.0:.2f} ms for realize, {(et-mt2)*1000.0:.2f} ms for GPU queue") + if not CI: print(f"ran openpilot model in {(et-st)*1000.0:.2f} ms, waited {(mt2-mt)*1000.0:.2f} ms for realize, {(et-mt2)*1000.0:.2f} ms for GPU queue") - import cProfile - import pstats - inputs = get_inputs() - pr = cProfile.Profile(timer=time.perf_counter_ns, timeunit=1e-6) - pr.enable() + if not CI: + import cProfile + import pstats + inputs = get_inputs() + pr = cProfile.Profile(timer=time.perf_counter_ns, timeunit=1e-6) + pr.enable() tinygrad_out = run_onnx(inputs)['outputs'] tinygrad_out.realize() tinygrad_out = tinygrad_out.numpy() - pr.disable() - stats = pstats.Stats(pr) - stats.dump_stats(temp("net.prof")) - os.system(f"flameprof {temp('net.prof')} > {temp('prof.svg')}") - ps = stats.sort_stats(pstats.SortKey.TIME) - ps.print_stats(30) + if not CI: + pr.disable() + stats = pstats.Stats(pr) + stats.dump_stats(temp("net.prof")) + os.system(f"flameprof {temp('net.prof')} > {temp('prof.svg')}") + ps = stats.sort_stats(pstats.SortKey.TIME) + ps.print_stats(30) def test_openpilot_model(self): dat = fetch(OPENPILOT_MODEL) diff --git a/test/models/test_train.py b/test/models/test_train.py index 6ea30ab0c8..3f58358564 100644 --- a/test/models/test_train.py +++ b/test/models/test_train.py @@ -11,6 +11,9 @@ from models.efficientnet import EfficientNet from models.transformer import Transformer from models.vit import ViT from models.resnet import ResNet18 +import pytest + +pytestmark = pytest.mark.exclude_gpu BS = getenv("BS", 2) diff --git a/test/test_assign.py b/test/test_assign.py index d979901cb5..37b322168c 100644 --- a/test/test_assign.py +++ b/test/test_assign.py @@ -5,6 +5,9 @@ from tinygrad.tensor import Tensor from tinygrad.lazy import LAZY from tinygrad.ops import GlobalCounters from tinygrad.graph import nm +import pytest + +pytestmark = pytest.mark.webgpu N = 200 # has to be bigger than the cache to fail diff --git a/test/test_conv.py b/test/test_conv.py index 433a705345..8042754b2f 100644 --- a/test/test_conv.py +++ b/test/test_conv.py @@ -1,6 +1,9 @@ import unittest import numpy as np from tinygrad.tensor import Tensor +import pytest + +pytestmark = [pytest.mark.exclude_cuda, pytest.mark.webgpu] class TestConv(unittest.TestCase): def test_simple(self): diff --git a/test/test_conv_shapetracker.py b/test/test_conv_shapetracker.py index 7975d3aebb..1a1219b217 100644 --- a/test/test_conv_shapetracker.py +++ b/test/test_conv_shapetracker.py @@ -3,6 +3,9 @@ import unittest from tinygrad.tensor import Tensor, Device from tinygrad.nn import Conv2d from tinygrad.ops import GlobalCounters +import pytest + +pytestmark = pytest.mark.webgpu #@unittest.skipUnless(Device.DEFAULT == "GPU", "Only GPU supports cache") @unittest.skip("with JIT changes, you only get the raw buffer") diff --git a/test/test_custom_function.py b/test/test_custom_function.py index 2583ff79c8..e35a53c02b 100644 --- a/test/test_custom_function.py +++ b/test/test_custom_function.py @@ -11,6 +11,9 @@ from tinygrad.helpers import prod, dtypes from tinygrad.lazy import LazyBuffer, create_lazybuffer, Device from tinygrad.ops import ASTRunner from tinygrad.shape.shapetracker import ShapeTracker +import pytest + +pytestmark = pytest.mark.webgpu # we don't always have GPU support, so the type signature is the abstract CompiledBuffer instead of GPUBuffer def atan2_gpu(ret:LazyBuffer, a:LazyBuffer, b:LazyBuffer): diff --git a/test/test_dtype.py b/test/test_dtype.py index 301c39e0a7..608a959b47 100644 --- a/test/test_dtype.py +++ b/test/test_dtype.py @@ -1,6 +1,6 @@ import unittest import numpy as np -from tinygrad.helpers import getenv, DType, DEBUG +from tinygrad.helpers import getenv, DType, DEBUG, CI from tinygrad.lazy import Device from tinygrad.tensor import Tensor, dtypes from typing import List, Optional diff --git a/test/test_jit.py b/test/test_jit.py index fa53e44d02..6c1d929d0d 100644 --- a/test/test_jit.py +++ b/test/test_jit.py @@ -3,6 +3,9 @@ import unittest import numpy as np from tinygrad.tensor import Tensor, Device from tinygrad.jit import TinyJit, JIT_SUPPORTED_DEVICE +import pytest + +pytestmark = pytest.mark.webgpu # NOTE: METAL fails, might be platform and optimization options dependent. @unittest.skipUnless(Device.DEFAULT in JIT_SUPPORTED_DEVICE and Device.DEFAULT not in ["METAL", "WEBGPU"], f"no JIT on {Device.DEFAULT}") diff --git a/test/test_net_speed.py b/test/test_net_speed.py index e382783505..69675b1081 100644 --- a/test/test_net_speed.py +++ b/test/test_net_speed.py @@ -5,6 +5,9 @@ import pstats import unittest import torch from tinygrad.tensor import Tensor, Device +import pytest + +pytestmark = [pytest.mark.exclude_cuda, pytest.mark.exclude_gpu, pytest.mark.exclude_clang] def start_profile(): import time diff --git a/test/test_nn.py b/test/test_nn.py index c31d780bec..dc91446550 100755 --- a/test/test_nn.py +++ b/test/test_nn.py @@ -7,6 +7,9 @@ from tinygrad.jit import TinyJit from tinygrad.tensor import Tensor, Device from tinygrad.nn import BatchNorm2d, Conv1d, ConvTranspose1d, Conv2d, ConvTranspose2d, Linear, GroupNorm, LayerNorm, LayerNorm2d, Embedding, InstanceNorm import torch +import pytest + +pytestmark = [pytest.mark.exclude_cuda, pytest.mark.webgpu] class TestNN(unittest.TestCase): diff --git a/test/test_ops.py b/test/test_ops.py index efb04e6dd2..067f6d2198 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -4,8 +4,15 @@ import math import numpy as np import unittest from tinygrad.tensor import Tensor -from tinygrad.helpers import getenv, IMAGE, DEBUG +from tinygrad.helpers import getenv, IMAGE, DEBUG, CI from tinygrad.lazy import Device +import pytest + +pytestmark = pytest.mark.webgpu + +if CI: + import warnings + warnings.filterwarnings("ignore", message="Non-empty compiler output encountered") FORWARD_ONLY = getenv("FORWARD_ONLY", 0) PRINT_TENSORS = getenv("PRINT_TENSORS", 0) @@ -49,7 +56,7 @@ def helper_test_op(shps, torch_fxn, tinygrad_fxn=None, atol=1e-6, rtol=1e-3, gra for i, (t, tt) in enumerate(zip(ts, tst)): compare(f"backward pass tensor {i}", tt.grad.numpy(), t.grad.detach().numpy(), atol=grad_atol, rtol=grad_rtol) - print("\ntesting %40r torch/tinygrad fp: %.2f / %.2f ms bp: %.2f / %.2f ms " % (shps, torch_fp*1000, tinygrad_fp*1000, torch_fbp*1000, tinygrad_fbp*1000), end="") + if not CI: print("\ntesting %40r torch/tinygrad fp: %.2f / %.2f ms bp: %.2f / %.2f ms " % (shps, torch_fp*1000, tinygrad_fp*1000, torch_fbp*1000, tinygrad_fbp*1000), end="") def prepare_test_op(a, b, shps, vals): torch.manual_seed(0) @@ -68,7 +75,7 @@ class TestOps(unittest.TestCase): with self.assertRaises(expected) as tinygrad_cm: tinygrad_fxn(*tst) if exact: self.assertEqual(str(torch_cm.exception), str(tinygrad_cm.exception)) - print("\ntesting %40r torch/tinygrad exception: %s / %s" % (shps, torch_cm.exception, tinygrad_cm.exception), end="") + if not CI: print("\ntesting %40r torch/tinygrad exception: %s / %s" % (shps, torch_cm.exception, tinygrad_cm.exception), end="") def test_full_like(self): a = Tensor([[1,2,3],[4,5,6]]) diff --git a/test/test_optim.py b/test/test_optim.py index b9bef30a21..c973873fdc 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -5,6 +5,9 @@ import torch import unittest from tinygrad.tensor import Tensor from tinygrad.nn.optim import Adam, SGD, AdamW +import pytest + +pytestmark = pytest.mark.exclude_cuda np.random.seed(1337) x_init = np.random.randn(1,4).astype(np.float32) diff --git a/test/test_randomness.py b/test/test_randomness.py index 91a4a38b05..3a3dce38b0 100644 --- a/test/test_randomness.py +++ b/test/test_randomness.py @@ -4,6 +4,9 @@ import numpy as np import torch from tinygrad.tensor import Tensor import tinygrad.nn as nn +import pytest + +pytestmark = pytest.mark.webgpu # https://gist.github.com/devries/11405101 def ksprob(a): diff --git a/test/test_specific_conv.py b/test/test_specific_conv.py index 1737a78d45..8e14ccd6e1 100644 --- a/test/test_specific_conv.py +++ b/test/test_specific_conv.py @@ -2,8 +2,11 @@ import unittest from tinygrad.tensor import Tensor from tinygrad.helpers import dtypes from tinygrad.lazy import Device +import pytest # similar to test/external/external_test_gpu_ast.py, but universal +pytestmark = pytest.mark.exclude_cuda + class TestSpecific(unittest.TestCase): # from openpilot diff --git a/test/test_speed_v_torch.py b/test/test_speed_v_torch.py index 5264f74826..3c07f1439f 100644 --- a/test/test_speed_v_torch.py +++ b/test/test_speed_v_torch.py @@ -14,8 +14,11 @@ from tinygrad.lazy import Device from tinygrad.ops import GlobalCounters from tinygrad.tensor import Tensor from tinygrad.nn import Conv2d -from tinygrad.helpers import colored, getenv, DEBUG +from tinygrad.helpers import colored, getenv, DEBUG, CI from tinygrad.jit import TinyJit +import pytest + +pytestmark = [pytest.mark.exclude_cuda, pytest.mark.exclude_gpu, pytest.mark.exclude_clang, pytest.mark.webgpu] IN_CHANS = [int(x) for x in getenv("IN_CHANS", "4,16,64").split(",")] @@ -93,7 +96,7 @@ def helper_test_generic(name, f1, f1_args, f2, f2_args): desc = "faster" if et_torch > et_tinygrad else "slower" flops = save_ops*1e-6 mem = save_mem*1e-6 - print(f"\r{name:42s} {et_torch:7.2f} ms ({flops/et_torch:8.2f} GFLOPS {mem/et_torch:8.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:8.2f} GFLOPS {mem/et_tinygrad:8.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB") + if not CI: print(f"\r{name:42s} {et_torch:7.2f} ms ({flops/et_torch:8.2f} GFLOPS {mem/et_torch:8.2f} GB/s) in torch, {et_tinygrad:7.2f} ms ({flops/et_tinygrad:8.2f} GFLOPS {mem/et_tinygrad:8.2f} GB/s) in tinygrad, {colorize_float(et_tinygrad/et_torch)} {desc} {flops:10.2f} MOPS {mem:8.2f} MB") np.testing.assert_allclose(val_tinygrad, val_torch, atol=1e-4, rtol=1e-3) def helper_test_conv(bs, in_chans, out_chans, kernel_size, img_size_y, img_size_x): diff --git a/test/test_tensor.py b/test/test_tensor.py index 5c745a6307..b903cf09ff 100644 --- a/test/test_tensor.py +++ b/test/test_tensor.py @@ -6,6 +6,9 @@ import itertools from tinygrad.tensor import Tensor, Device from tinygrad.helpers import dtypes from extra.gradcheck import numerical_jacobian, jacobian, gradcheck +import pytest + +pytestmark = pytest.mark.webgpu x_init = np.random.randn(1,3).astype(np.float32) U_init = np.random.randn(3,3).astype(np.float32) diff --git a/test/unit/test_example.py b/test/unit/test_example.py index db5e7a7aea..dc6567607d 100644 --- a/test/unit/test_example.py +++ b/test/unit/test_example.py @@ -2,22 +2,22 @@ import unittest import numpy as np from tinygrad.lazy import Device from tinygrad.tensor import Tensor -from tinygrad.helpers import getenv +from tinygrad.helpers import getenv, CI def multidevice_test(fxn): exclude_devices = getenv("EXCLUDE_DEVICES", "").split(",") def ret(self): for device in Device._buffers: if device in ["DISK", "FAKE"]: continue - print(device) + if not CI: print(device) if device in exclude_devices: - print(f"WARNING: {device} test is excluded") + if not CI: print(f"WARNING: {device} test is excluded") continue with self.subTest(device=device): try: Device[device] except Exception: - print(f"WARNING: {device} test isn't running") + if not CI: print(f"WARNING: {device} test isn't running") continue fxn(self, device) return ret