Files
tinygrad/.github/workflows/test.yml
uuuvn dba073e5c0 Less messy broken graph on paravirtualized metal workaround (#10182)
* Less messy broken graph on paravirtualized metal workaround

GitHub CI macOS runners use paravirtualized metal which is broken with
graph (some comments say that ICB in particular is broken but in my
testing it was fine sometimes, but other times hitting an assert inside
metal's code related to resouces, so not sure).

> Assertion failed: (resource != nil), function -[IOGPUMetalResource initWithResource:], file IOGPUMetalResource.m, line 458.

This can be reproduced locally with any virtualization software (like utm)
that can create macOS VMs with apple's own virtualization framework.

* unused import
2025-05-06 20:41:02 +03:00

862 lines
38 KiB
YAML

name: Unit Tests
env:
# increment this when downloads substantially change to avoid the internet
DOWNLOAD_CACHE_VERSION: '9'
CAPTURE_PROCESS_REPLAY: 1
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
on:
push:
branches:
- master
pull_request:
workflow_dispatch:
jobs:
llvmspeed:
name: LLVM Speed
runs-on: ubuntu-24.04
timeout-minutes: 20
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: llvm-speed
deps: testing_minimal
llvm: 'true'
- name: External Benchmark Schedule
run: PYTHONPATH="." python3 test/external/external_benchmark_schedule.py
- name: Speed Test
run: LLVM=1 python3 test/test_speed_v_torch.py
- name: Speed Test (BEAM=2)
run: BEAM=2 LLVM=1 python3 test/test_speed_v_torch.py
docs:
name: Docs
runs-on: ubuntu-22.04
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
deps: docs
pydeps: "capstone"
- name: Use as an external package
run: |
mkdir $HOME/test_external_dir
cd $HOME/test_external_dir
python -m venv venv
source venv/bin/activate
pip install $GITHUB_WORKSPACE
python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"
# Test using VIZ=1 after package installation
VIZ=1 python -c "from tinygrad.tensor import Tensor; Tensor([1,2,3,4,5]).realize()" & VIZ_PID=$!
echo "started VIZ server at: $(ps -p "$VIZ_PID" -o pid=)"
i=0; while ((i++ < 10)); do curl -sSf localhost:8000 > /dev/null && break || { echo "VIZ verification attempt $i/10"; sleep 1; }; done; ((i > 10)) && echo "Could not verify VIZ server" && exit 1
kill $VIZ_PID
pip install mypy
mypy -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"
- name: Run beautiful_mnist with tinygrad only
run: |
mkdir $GITHUB_WORKSPACE/test_dir
cd $GITHUB_WORKSPACE/test_dir
python -m venv venv
source venv/bin/activate
pip install $GITHUB_WORKSPACE
cp $GITHUB_WORKSPACE/examples/beautiful_mnist.py .
PYTHONPATH=$GITHUB_WORKSPACE BS=2 STEPS=10 python beautiful_mnist.py
- name: Test Docs Build
run: python -m mkdocs build --strict
- name: Test Docs
run: |
python docs/abstractions2.py
python docs/abstractions3.py
- name: Test Quickstart
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' docs/quickstart.md > quickstart.py && PYTHONPATH=. python quickstart.py
- name: Test DEBUG
run: DEBUG=100 python3 -c "from tinygrad import Tensor; N = 1024; a, b = Tensor.rand(N, N), Tensor.rand(N, N); c = (a.reshape(N, 1, N) * b.T.reshape(1, N, N)).sum(axis=2); print((c.numpy() - (a.numpy() @ b.numpy())).mean())"
- name: Compile EfficientNet to C and test it
run: |
CPU=1 PYTHONPATH="." python examples/compile_efficientnet.py > recognize.c
clang -O2 recognize.c -lm -o recognize
cat test/models/efficientnet/Chicken.jpg | ./recognize | grep cock
autogen:
name: Autogen
runs-on: ubuntu-22.04
timeout-minutes: 15
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
opencl: 'true'
amd: 'true'
cuda: 'true'
webgpu: 'true'
llvm: 'true'
- name: Verify OpenCL autogen
run: |
cp tinygrad/runtime/autogen/opencl.py /tmp/opencl.py.bak
./autogen_stubs.sh opencl
diff /tmp/opencl.py.bak tinygrad/runtime/autogen/opencl.py
- name: Verify CUDA autogen
run: |
cp tinygrad/runtime/autogen/cuda.py /tmp/cuda.py.bak
cp tinygrad/runtime/autogen/nv_gpu.py /tmp/nv_gpu.py.bak
./autogen_stubs.sh cuda
./autogen_stubs.sh nv
diff /tmp/cuda.py.bak tinygrad/runtime/autogen/cuda.py
diff /tmp/nv_gpu.py.bak tinygrad/runtime/autogen/nv_gpu.py
- name: Verify AMD autogen
run: |
cp tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak
cp tinygrad/runtime/autogen/kfd.py /tmp/kfd.py.bak
cp tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak
cp tinygrad/runtime/autogen/amd_gpu.py /tmp/amd_gpu.py.bak
cp tinygrad/runtime/autogen/sqtt.py /tmp/sqtt.py.bak
./autogen_stubs.sh hsa
./autogen_stubs.sh kfd
./autogen_stubs.sh comgr
./autogen_stubs.sh amd
./autogen_stubs.sh sqtt
diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py
diff /tmp/kfd.py.bak tinygrad/runtime/autogen/kfd.py
diff /tmp/comgr.py.bak tinygrad/runtime/autogen/comgr.py
diff /tmp/amd_gpu.py.bak tinygrad/runtime/autogen/amd_gpu.py
diff /tmp/sqtt.py.bak tinygrad/runtime/autogen/sqtt.py
- name: Verify Linux autogen
run: |
cp tinygrad/runtime/autogen/libc.py /tmp/libc.py.bak
cp tinygrad/runtime/autogen/io_uring.py /tmp/io_uring.py.bak
./autogen_stubs.sh libc
./autogen_stubs.sh io_uring
diff /tmp/libc.py.bak tinygrad/runtime/autogen/libc.py
diff /tmp/io_uring.py.bak tinygrad/runtime/autogen/io_uring.py
- name: Verify WebGPU autogen
run: |
cp tinygrad/runtime/autogen/webgpu.py /tmp/webgpu.py.bak
./autogen_stubs.sh webgpu
diff /tmp/webgpu.py.bak tinygrad/runtime/autogen/webgpu.py
- name: Verify LLVM autogen
run: |
cp tinygrad/runtime/autogen/llvm.py /tmp/llvm.py.bak
./autogen_stubs.sh llvm
diff /tmp/llvm.py.bak tinygrad/runtime/autogen/llvm.py
torchbackend:
name: Torch Backend Tests
runs-on: ubuntu-latest
timeout-minutes: 15
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: torch-backend-pillow-torchvision-et-pt
deps: testing_minimal
pydeps: "pillow torchvision expecttest"
llvm: 'true'
- name: Install ninja
run: |
sudo apt update || true
sudo apt install -y --no-install-recommends ninja-build
- name: Lint with ruff
run: |
pip3 install --upgrade --force-reinstall ruff==0.11.0
python3 -m ruff check extra/torch_backend/backend.py
- name: Test one op
run: PYTHONPATH=. FORWARD_ONLY=1 TINY_BACKEND=1 python3 test/test_ops.py TestOps.test_add
- name: Test ResNet-18
run: PYTHONPATH=. DEBUG=2 python3 extra/torch_backend/example.py
- name: My (custom) tests
run: PYTHONPATH=. python3 extra/torch_backend/test.py
- name: Test one op in torch tests
run: PYTHONPATH=. DEBUG=2 python3 extra/torch_backend/torch_tests.py TestTinyBackendPRIVATEUSE1.test_unary_log_tiny_float32
- name: Test Ops with TINY_BACKEND
run: PYTHONPATH=. LLVM=1 LLVMOPT=0 TINY_BACKEND=1 python3 -m pytest -n auto test/test_ops.py --durations=20
- name: Test in-place operations on views
run: PYTHONPATH=. TORCH_DEBUG=1 python3 extra/torch_backend/test_inplace.py
- name: Test multi-gpu
run: PYTHONPATH=. LLVM=1 GPUS=4 TORCH_DEBUG=1 python3 extra/torch_backend/test_multigpu.py
torchbackendmore:
name: Torch Backend Tests More
runs-on: ubuntu-latest
timeout-minutes: 15
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: torch-backend-pillow-torchvision-et-pt
deps: testing_minimal
llvm: 'true'
- name: Install ninja
run: |
sudo apt update || true
sudo apt install -y --no-install-recommends ninja-build
- name: Test beautiful_mnist in torch with TINY_BACKEND
run: SPLIT_REDUCEOP=0 FUSE_ARANGE=1 PYTHONPATH=. LLVM=1 TARGET_EVAL_ACC_PCT=96.0 TINY_BACKEND=1 python3 examples/other_mnist/beautiful_mnist_torch.py
- name: Test some torch tests (expect failure)
run: PYTHONPATH=. python3 -m pytest extra/torch_backend/torch_tests.py -v --tb=no || true
tc:
name: Tensor Core tests
runs-on: ubuntu-latest
timeout-minutes: 10
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: uops-minimal
deps: testing_minimal
- name: Test IMAGE=2 support
run: |
IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm
IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_simple_conv2d
- name: Test emulated METAL tensor cores
run: |
DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_big_gemm
PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores
PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops
- name: Test emulated AMX tensor cores
run: PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm
- name: Test emulated AMD tensor cores
run: |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores_padded_amd TestLinearizer.test_tensor_cores_padded_uops
- name: Test emulated AMD MFMA tensor cores
run: |
PYTHONPATH=. DEBUG=2 EMULATE_AMD_MFMA=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
PYTHONPATH=. DEBUG=2 EMULATE_AMD_MFMA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores
PYTHONPATH=. DEBUG=2 EMULATE_AMD_MFMA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops
- name: Test emulated AMD RDNA4 tensor cores
run: |
PYTHONPATH=. DEBUG=2 EMULATE_AMD_RDNA4=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
PYTHONPATH=. DEBUG=2 EMULATE_AMD_RDNA4=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
PYTHONPATH=. DEBUG=2 EMULATE_AMD_RDNA4=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py
PYTHONPATH=. DEBUG=2 EMULATE_AMD_RDNA4=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py
PYTHONPATH=. DEBUG=2 EMULATE_AMD_RDNA4=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores
PYTHONPATH=. DEBUG=2 EMULATE_AMD_RDNA4=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops
- name: Test emulated CUDA tensor cores
run: |
DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16
DEBUG=2 EMULATE_CUDA=1 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm
DEBUG=2 EMULATE_CUDA_SM75=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16
PYTHONPATH="." DEBUG=2 EMULATE_CUDA=1 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores
PYTHONPATH="." DEBUG=2 EMULATE_CUDA=1 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops
- name: Test emulated INTEL OpenCL tensor cores
run: DEBUG=2 EMULATE_INTEL=1 FORWARD_ONLY=1 PYTHON=1 HALF=1 N=64 python3 ./extra/gemm/simple_matmul.py
- name: Full test tensor cores
run: |
PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores
PYTHONPATH=. DEBUG=2 EMULATE_CUDA=1 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores
PYTHONPATH=. DEBUG=2 EMULATE_INTEL=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores
PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores
- name: Test tensor cores (TC=3)
run: |
PYTHONPATH=. DEBUG=2 PYTHON=1 EMULATE_METAL=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores_emulation
PYTHONPATH=. DEBUG=2 PYTHON=1 EMULATE_AMD=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores_emulation
PYTHONPATH=. DEBUG=2 PYTHON=1 EMULATE_AMD_MFMA=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores_emulation
PYTHONPATH=. DEBUG=2 PYTHON=1 EMULATE_CUDA=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores_emulation
PYTHONPATH=. DEBUG=2 PYTHON=1 EMULATE_INTEL=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores_emulation
PYTHONPATH=. DEBUG=2 PYTHON=1 EMULATE_AMX=1 AMX=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores_emulation
- name: Test device flop counts
run: |
PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
PYTHONPATH=. DEBUG=2 EMULATE_CUDA=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
PYTHONPATH=. DEBUG=2 EMULATE_INTEL=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStats.test_simple_matmul
bepython:
name: Python Backend
runs-on: ubuntu-latest
timeout-minutes: 10
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: be-minimal
deps: testing_minimal
- name: Test dtype with Python emulator
run: DEBUG=1 PYTHONPATH=. PYTHON=1 python3 -m pytest -n=auto test/test_dtype.py test/test_dtype_alu.py
- name: Test ops with Python emulator
run: DEBUG=2 PYTHON=1 python3 -m pytest -n=auto test/test_ops.py -k "not (test_split or test_simple_cumsum or test_cumsum or test_einsum or test_dot or test_dot_1d or test_big_gemm or test_broadcastdot or test_multidot or test_var_axis or test_std_axis or test_broadcast_full or test_broadcast_partial or test_simple_conv3d or test_dilated_conv_transpose2d or test_simple_conv_transpose3d or test_large_input_conv2d or test_max_pool2d or test_max_pool2d_simple or test_max_pool2d_bigger_stride or test_avg_pool2d or test_cat or test_scaled_product_attention or test_scaled_product_attention_causal or test_slice_fancy_indexing_dim_inject_none or test_slice_fancy_indexing_list_indices or test_slice_fancy_indexing_no_dim_collapse or test_slice_fancy_indexing_tuple_indices or test_slice_fancy_indexing_list_with_tensors or test_slice_fancy_indexing_dim_collapse_int or test_interpolate_bilinear or test_interpolate_bilinear_corners_aligned or test_scaled_dot_product_attention or test_cummax or test_simple_cummax or test_logcumsumexp or test_sort or test_cumprod)" --durations=20
- name: Test uops with Python emulator
run: PYTHON=1 python3 -m pytest test/test_uops.py --durations=20
- name: Test symbolic with Python emulator
run: PYTHONPATH=. PYTHON=1 python3 test/test_symbolic_ops.py
- name: test_linearizer_failures with Python emulator
run: PYTHONPATH=. PYTHON=1 python3 -m pytest -rA test/test_linearizer_failures.py::TestLinearizerFailures::test_failure_1
- name: test_renderer_failures with Python emulator
run: PYTHONPATH=. PYTHON=1 python3 -m pytest -rA test/test_renderer_failures.py::TestRendererFailures
linter:
name: Linters
runs-on: ubuntu-latest
timeout-minutes: 10
# TODO: run the pre-commit hook to replace a lot of this
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: linting-only
python-version: '3.10'
deps: linting
- name: Lint bad-indentation and trailing-whitespace with pylint
run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y .
- name: Lint with ruff
run: |
pip3 install --upgrade --force-reinstall ruff==0.11.0
python3 -m ruff check .
python3 -m ruff check examples/mlperf/model_train.py --ignore E501
- name: Lint tinygrad with pylint
run: python -m pylint tinygrad/
- name: Run mypy
run: python -m mypy --strict-equality --lineprecision-report . && cat lineprecision.txt
unittest:
name: Unit Tests
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: unittest-12
deps: testing_unit
- name: Test README
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py && PYTHONPATH=. python README.py
- name: Run unit tests
run: PYTHONPATH="." python -m pytest -n=auto test/unit/
- name: Run targetted tests on NULL backend
run: PYTHONPATH="." NULL=1 python3 test/test_multitensor.py TestMultiTensor.test_data_parallel_resnet_train_step
# TODO: support fake weights
#- name: Run LLaMA 7B on 4 fake devices
# run: NULL=1 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 3 --temperature 0 --timing
- name: Run GC tests
run: PYTHONPATH="." python test/external/external_uop_gc.py
- name: Repo line count < 13000 lines
run: MAX_LINE_COUNT=13000 python sz.py
fuzzing:
name: Fuzzing
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: fuzzing-minimal
deps: testing_minimal
- name: Fuzz Test symbolic
run: python test/external/fuzz_symbolic.py
- name: Fuzz Test fast idiv
run: python test/external/fuzz_fast_idiv.py
- name: Fuzz Test shapetracker
run: |
PYTHONPATH="." python test/external/fuzz_shapetracker.py
PYTHONPATH="." python test/external/fuzz_shapetracker_math.py
testgpuimage:
name: 'GPU IMAGE Tests'
runs-on: ubuntu-22.04
timeout-minutes: 10
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: gpu-image
deps: testing_minimal
opencl: 'true'
- name: Run Kernel Count Test
run: PYTHONPATH="." GPU=1 python -m pytest -n=auto test/external/external_test_opt.py
- name: Test WINO=1
run: GPU=1 DEBUG=2 WINO=1 python3 test/test_ops.py TestOps.test_simple_conv2d
- name: Test GPU IMAGE=2 ops + training
run: |
PYTHONPATH="." GPU=1 IMAGE=2 python -m pytest -n=auto test/test_ops.py --durations=20
PYTHONPATH="." GPU=1 IMAGE=2 python3 test/models/test_end2end.py TestEnd2End.test_linear_mnist
- name: Run process replay tests
uses: ./.github/actions/process-replay
testopenpilot:
name: 'openpilot Compile Tests'
runs-on: ubuntu-22.04
timeout-minutes: 10
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: openpilot-compile
deps: testing
opencl: 'true'
- name: Test openpilot model kernel count and gate usage
run: |
PYTHONPATH="." ALLOWED_KERNEL_COUNT=209 ALLOWED_READ_IMAGE=2138 ALLOWED_GATED_READ_IMAGE=29 FLOAT16=0 GPU=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/v0.9.4/selfdrive/modeld/models/supercombo.onnx
- name: Test openpilot alt model correctness (float32)
run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/3799fe46b3a629e491d4b8498b8ae83e4c88c304/selfdrive/modeld/models/supercombo.onnx
- name: Test openpilot fastvits model correctness (float32)
run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx
- name: Run process replay tests
uses: ./.github/actions/process-replay
testopencl:
name: 'ONNX+Optimization Tests'
runs-on: ubuntu-22.04
timeout-minutes: 20
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: onnxoptl
deps: testing,testing_tf
python-version: '3.11'
opencl: 'true'
llvm: 'true'
- name: Test ONNX (GPU)
run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test ONNX (CPU)
run: CPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test ONNX (LLVM)
run: LLVM=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test Additional ONNX Ops (CPU)
run: CPU=1 PYTHONPATH=. python3 test/external/external_test_onnx_ops.py
- name: Test Quantize ONNX
run: CPU=1 PYTHONPATH=. python3 test/test_quantize_onnx.py
- name: Run REMOTE=1 Test
run: |
REMOTEDEV=CPU REMOTE=1 python3 -m pytest test/test_tiny.py test/test_jit.py
REMOTEDEV=GPU REMOTE=1 python3 -m pytest test/test_tiny.py test/test_image_dtype.py test/test_jit.py
REMOTEDEV=GPU IMAGE=2 REMOTE=1 python3 -m pytest test/test_tiny.py test/test_image_dtype.py
- name: Test Optimization Helpers
run: PYTHONPATH="." DEBUG=1 python3 extra/optimization/test_helpers.py
- name: Test Action Space
run: PYTHONPATH="." DEBUG=1 GPU=1 python3 extra/optimization/get_action_space.py
- name: Test Beam Search
run: PYTHONPATH="." GPU=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
- name: Test MLPerf stuff
run: GPU=1 python -m pytest -n=auto test/external/external_test_optim.py test/external/external_test_losses.py test/external/external_test_metrics.py test/external/external_test_datasets.py --durations=20
- name: Run handcode_opt
run: PYTHONPATH=. MODEL=resnet GPU=1 DEBUG=1 BS=4 HALF=0 python3 examples/handcode_opt.py
- name: Run process replay tests
uses: ./.github/actions/process-replay
testmodels:
name: Models (llvm+cpu+gpu)
runs-on: ubuntu-22.04
timeout-minutes: 10
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: models
deps: testing
opencl: 'true'
llvm: 'true'
- name: Test models (llvm)
run: LLVM=1 python -m pytest -n=auto test/models --durations=20
- name: Test models (gpu)
run: GPU=1 python -m pytest -n=auto test/models --durations=20
- name: Test models (cpu)
run: CPU=1 python -m pytest -n=auto test/models --durations=20
- name: Run process replay tests
uses: ./.github/actions/process-replay
testdsp:
name: Linux (DSP)
runs-on: ubuntu-24.04
timeout-minutes: 15
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: dsp-minimal
deps: testing_minimal
pydeps: "onnx==1.17.0 onnxruntime pillow"
llvm: "true"
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build QEMU Docker with cache
uses: docker/build-push-action@v4
with:
file: extra/dsp/Dockerfile
push: false
load: true
tags: qemu-hexagon:latest
cache-from: type=gha
cache-to: type=gha,mode=min
- name: Run test_tiny on DSP
run: DEBUG=2 DSP=1 python test/test_tiny.py
- name: Test transcendentals
run: CC=clang-19 PYTHONPATH="." DEBUG=2 DSP=1 python test/test_transcendental.py TestTranscendentalVectorized
- name: Test quantize onnx
run: PYTHONPATH="." DEBUG=2 DSP=1 python3 test/test_quantize_onnx.py
- name: Test LLVM=1 DEVECTORIZE=0
run: LLVM=1 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/test_ops.py -k "not test_avg_pool3d_failure"
- name: Test LLVM=1 DEVECTORIZE=0 for model
run: PYTHONPATH="." LLVM=1 DEVECTORIZE=0 python3 test/models/test_efficientnet.py
- name: Test CPU=1 DEVECTORIZE=0
run: CPU=1 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/test_ops.py -k "not test_avg_pool3d_failure"
testwebgpu:
name: Linux (WebGPU)
runs-on: ubuntu-22.04
timeout-minutes: 20
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: webgpu-minimal
deps: testing_minimal
python-version: '3.11'
webgpu: 'true'
- name: Check Device.DEFAULT (WEBGPU) and print some source
run: |
WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT"
WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
- name: Run selected webgpu tests
run: |
WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Vulkan" python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit \
--ignore=test/test_copy_speed.py --ignore=test/test_rearrange_einops.py \
--ignore=test/test_fuzz_shape_ops.py --ignore=test/test_linearizer_failures.py --durations=20
- name: Run process replay tests
uses: ./.github/actions/process-replay
testamd:
strategy:
fail-fast: false
matrix:
backend: [amd, amdllvm]
name: Linux (${{ matrix.backend }})
runs-on: ubuntu-22.04
timeout-minutes: 20
env:
IGNORE_OOB: 0
AMD: 1
MOCKGPU: 1
FORWARD_ONLY: 1
AMD_LLVM: ${{ matrix.backend == 'amdllvm' && '1' || matrix.backend != 'amdllvm' && '0' }}
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: ${{ matrix.backend }}-minimal
deps: testing_minimal
amd: 'true'
llvm: ${{ matrix.backend == 'amdllvm' && 'true' }}
- name: Check Device.DEFAULT and print some source
run: |
PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['AMD'], Device.DEFAULT"
DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
- name: Run LLVM test
if: matrix.backend=='amdllvm'
run: PYTHONPATH="." python test/test_amd_llvm.py
- name: Run pytest (amd)
run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/test_hcq.py test/external/external_test_am.py --durations=20
- name: Run TRANSCENDENTAL math
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
- name: Run process replay tests
uses: ./.github/actions/process-replay
testnvidia:
strategy:
fail-fast: false
matrix:
backend: [ptx, nv]
name: Linux (${{ matrix.backend }})
runs-on: ubuntu-22.04
timeout-minutes: 20
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: ${{ matrix.backend }}-minimal
deps: testing_minimal
cuda: 'true'
- name: Set env
run: printf "${{ matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nPTX=1\nMOCKGPU=1' || matrix.backend == 'nv' && 'NV=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV
- name: Check Device.DEFAULT and print some source
run: |
PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CUDA','NV'], Device.DEFAULT"
DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
- name: Run pytest (cuda)
run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --ignore test/test_gc.py --durations=20
- name: Run process replay tests
uses: ./.github/actions/process-replay
tests:
strategy:
fail-fast: false
matrix:
backend: [llvm, cpu, gpu]
name: Linux (${{ matrix.backend }})
runs-on: ubuntu-22.04
timeout-minutes: 20
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: ${{ matrix.backend }}-minimal
deps: testing_minimal
opencl: ${{ matrix.backend == 'gpu' && 'true' }}
llvm: ${{ matrix.backend == 'llvm' && 'true' }}
- name: Set env
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'cpu' && 'CPU=1' || matrix.backend == 'gpu' && 'GPU=1' }}" >> $GITHUB_ENV
- name: Check Device.DEFAULT and print some source
run: |
PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CPU','GPU'], Device.DEFAULT"
DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
- name: Run pytest (not cuda)
run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --durations=20
- name: Run TRANSCENDENTAL math
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
- name: Run process replay tests
uses: ./.github/actions/process-replay
# ****** OSX Tests ******
testmetal2:
name: MacOS (unit)
runs-on: macos-14
timeout-minutes: 20
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: metal2
deps: testing
python-version: '3.11'
amd: 'true'
cuda: 'true'
llvm: 'true'
- name: Run real world test
run: METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20
- name: Test models (Metal)
run: METAL=1 python -m pytest test/models -v --durations=20
- name: Run ONNX
run: METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test tensor core ops (fake)
run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm
- name: Test tensor core ops (real)
run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm
- name: Test LLaMA compile speed
run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py
- name: Test Beam Search
run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
- name: Fuzz Test linearizer
run: PYTHONPATH="." METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py
- name: Run TRANSCENDENTAL math
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
- name: Run pytest (amd)
env:
MOCKGPU: 1
AMD: 1
FORWARD_ONLY: 1
run: |
python3 -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20
- name: Run pytest (amd with llvm backend)
env:
MOCKGPU: 1
AMD: 1
AMD_LLVM: 1
FORWARD_ONLY: 1
run: |
python -m pytest -n=auto test/test_hcq.py test/test_tiny.py test/test_amd_llvm.py --durations=20
- name: Run pytest (ptx)
env:
MOCKGPU: 1
PTX: 1
NV: 1
FORWARD_ONLY: 1
run: |
python3 -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20
- name: Run process replay tests
uses: ./.github/actions/process-replay
osxwebgpu:
name: MacOS (WebGPU)
runs-on: macos-14
timeout-minutes: 10
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: osx-webgpu
webgpu: 'true'
- name: Build WEBGPU Efficientnet
run: WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Metal" python3 -m examples.compile_efficientnet
- name: Clean npm cache
run: npm cache clean --force
- name: Install Puppeteer
run: npm install puppeteer
- name: Run WEBGPU Efficientnet
run: node test/web/test_webgpu.js
osxremote:
name: MacOS (remote)
runs-on: macos-15
timeout-minutes: 10
env:
REMOTE: 1
REMOTEDEV: METAL
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: macos-remote
deps: testing_minimal
- name: Check Device.DEFAULT and print some source
run: |
python -c "from tinygrad import Device; assert Device.DEFAULT == 'REMOTE', Device.DEFAULT"
python -c "from tinygrad import Device; assert Device.default.properties['remotedev'] == 'METAL', Device.default.properties['remotedev']"
DEBUG=4 python3 test/test_tiny.py TestTiny.test_plus
- name: Run REMOTE=1 Test
run: |
python3 -m pytest test/test_tiny.py test/test_jit.py
osxtests:
strategy:
fail-fast: false
matrix:
backend: [metal, llvm, cpu]
name: MacOS (${{ matrix.backend }})
runs-on: macos-15
timeout-minutes: 20
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: macos-${{ matrix.backend }}-minimal
deps: testing_minimal
llvm: ${{ matrix.backend == 'llvm' && 'true' }}
- name: Set env
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'cpu' && 'CPU=1' || matrix.backend == 'metal' && 'METAL=1'}}" >> $GITHUB_ENV
- name: Check Device.DEFAULT and print some source
run: |
python -c "from tinygrad import Device; assert Device.DEFAULT == '${{ matrix.backend }}'.upper(), Device.DEFAULT"
DEBUG=4 python3 test/test_tiny.py TestTiny.test_plus
- name: Run pytest (${{ matrix.backend }})
run: python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --durations=20
- name: Run process replay tests
uses: ./.github/actions/process-replay
- name: Run macOS-specific unit test
if: matrix.backend == 'cpu'
run: python3 -m pytest test/unit/test_disk_tensor.py::TestDiskTensor::test_copy_to_cpu_not_truncated
# ****** Windows Tests ******
wintests:
strategy:
fail-fast: false
matrix:
backend: [llvm, cpu]
name: Windows (${{ matrix.backend }})
runs-on: windows-latest
timeout-minutes: 15
env:
IGNORE_OOB: 0
steps:
- name: Checkout Code
uses: actions/checkout@v4
- name: Setup Environment
uses: ./.github/actions/setup-tinygrad
with:
key: windows-minimal
deps: testing_unit
- name: Set env
shell: bash
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'cpu' && 'CPU=1'}}" >> $GITHUB_ENV
- name: Run unit tests
if: matrix.backend=='llvm'
run: python -m pytest -n=auto test/unit/ --ignore=test/unit/test_disk_tensor.py --ignore=test/unit/test_elf.py --ignore=test/unit/test_tar.py
- name: Run pytest (${{ matrix.backend }})
shell: bash
run: |
python -c "from tinygrad import Device; assert Device.DEFAULT == '${{ matrix.backend }}'.upper(), Device.DEFAULT"
python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20