cleanup tests, bump caches (#11746)

This commit is contained in:
George Hotz
2025-08-19 21:21:07 -07:00
committed by GitHub
parent 00391db628
commit 8af8808c61
10 changed files with 33 additions and 37 deletions

View File

@@ -121,7 +121,7 @@ runs:
echo 'Acquire::GzipIndexes "true";' | sudo tee /etc/apt/apt.conf.d/gzip
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' | sudo tee -a /etc/apt/apt.conf.d/99keep-debs
- name: Add OpenCL Repo
if: inputs.opencl == 'true' && runner.os == 'Linux'
shell: bash
@@ -174,7 +174,7 @@ runs:
if [[ "${{ inputs.llvm }}" == "true" ]]; then
pkgs+=" libllvm20 clang-20 lld-20"
fi
echo "pkgs=$pkgs" >> "$GITHUB_OUTPUT"
echo "hash=$(echo -n "$pkgs" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT"
@@ -183,21 +183,21 @@ runs:
uses: actions/cache@v4
with:
path: /var/cache/apt/archives/
key: ${{ runner.os }}-apt-${{ steps.apt-pkgs.outputs.hash }}
key: ${{ runner.os }}-apt-${{ steps.apt-pkgs.outputs.hash }}-${{ env.APT_CACHE_VERSION }}
- name: Run apt Update + Install
if: runner.os == 'Linux' && (inputs.opencl == 'true' || inputs.amd == 'true' || inputs.cuda == 'true' || inputs.webgpu == 'true' || inputs.llvm == 'true')
shell: bash
run: |
sudo apt -qq update || true
# ******** do install ********
if [[ -n "${{ steps.apt-pkgs.outputs.pkgs }}" ]]; then
sudo apt-get -y --allow-unauthenticated --no-install-recommends install ${{ steps.apt-pkgs.outputs.pkgs }}
fi
sudo chown -R $USER:$USER /var/cache/apt/archives/
# **** AMD ****
- name: Setup AMD (Linux)
if: inputs.amd == 'true' && runner.os == 'Linux'
@@ -234,7 +234,7 @@ runs:
cache-name: cache-gpuocelot-build
with:
path: ${{ github.workspace }}/gpuocelot/ocelot
key: ${{ runner.os }}-gpuocelot-b16039dc940dc6bc4ea0a98380495769ff35ed99-rebuild-0
key: ${{ runner.os }}-gpuocelot-b16039dc940dc6bc4ea0a98380495769ff35ed99-rebuild-${{ env.BUILD_CACHE_VERSION }}
- name: Clone/compile gpuocelot
if: inputs.ocelot == 'true' && steps.cache-build.outputs.cache-hit != 'true'
shell: bash

View File

@@ -63,7 +63,7 @@ jobs:
- name: Run model inference benchmark
run: METAL=1 python3.11 test/external/external_model_benchmark.py
- name: Test speed vs torch
run: BIG=2 MPS=1 python3.11 test/external/external_test_speed_v_torch.py | tee torch_speed.txt
run: BIG=2 MPS=1 python3.11 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
- name: Test tensor cores
run: METAL=1 python3.11 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops
- name: Test AMX tensor cores
@@ -187,7 +187,7 @@ jobs:
- name: Run model inference benchmark
run: NV=1 CAPTURE_PROCESS_REPLAY=0 NOCLANG=1 python3 test/external/external_model_benchmark.py
- name: Test speed vs torch
run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/external/external_test_speed_v_torch.py | tee torch_speed.txt
run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
- name: Test speed vs theoretical
run: NV=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
- name: Test benchmark allreduce
@@ -389,7 +389,7 @@ jobs:
#- name: Test speed vs torch
# run: |
# python3 -c "import torch; print(torch.__version__)"
# LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/external/external_test_speed_v_torch.py | tee torch_speed.txt
# LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
- name: Test speed vs theoretical
run: AMD=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
- name: Test tensor cores

View File

@@ -1,8 +1,10 @@
name: Unit Tests
env:
# increment this when downloads substantially change to avoid the internet
DOWNLOAD_CACHE_VERSION: '11'
PYTHON_CACHE_VERSION: '2'
DOWNLOAD_CACHE_VERSION: '12'
PYTHON_CACHE_VERSION: '3'
APT_CACHE_VERSION: '1'
BUILD_CACHE_VERSION: '1'
CAPTURE_PROCESS_REPLAY: 1
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -30,9 +32,9 @@ jobs:
- name: External Benchmark Schedule
run: PYTHONPATH="." python3 test/external/external_benchmark_schedule.py
- name: Speed Test
run: LLVM=1 python3 test/external/external_test_speed_v_torch.py
run: LLVM=1 python3 test/speed/external_test_speed_v_torch.py
- name: Speed Test (BEAM=2)
run: BEAM=2 LLVM=1 python3 test/external/external_test_speed_v_torch.py
run: BEAM=2 LLVM=1 python3 test/speed/external_test_speed_v_torch.py
docs:
name: Docs

View File

@@ -2,7 +2,7 @@ import time
from tinygrad import Tensor, TinyJit, Device, Context
from tinygrad.helpers import Profiling, Timing, GlobalCounters
# python3 test/external/external_test_speed_v_torch.py TestSpeed.test_add_a
# python3 test/speed/external_test_speed_v_torch.py TestSpeed.test_add_a
@TinyJit
def plus(a:Tensor, b:Tensor): return a+b

View File

@@ -1,7 +1,10 @@
import unittest
import unittest, io
from contextlib import redirect_stdout
from tinygrad import Tensor, dtypes, Device
from tinygrad.helpers import OSX
from tinygrad.engine.realize import lower_schedule
from tinygrad.device import is_dtype_supported
from tinygrad.engine.realize import get_program
class TestCompileFailures(unittest.TestCase):
def compile(self, out:Tensor):
@@ -14,5 +17,17 @@ class TestCompileFailures(unittest.TestCase):
def test_add_max_uchar(self):
self.compile((Tensor.empty(1024, dtype='uint8') + Tensor.empty(1024, dtype='uint8')).max())
class TestDisassembly(unittest.TestCase):
# TODO: fails on llvm. llvm.LLVMGetHostCPUName() returns "generic"
@unittest.skipUnless(Device.DEFAULT in ("CPU",) and OSX, "m series cpus support fp16 arithmetic")
def test_float16_alu(self):
c = Tensor([1], dtype=dtypes.float16) + Tensor([1], dtype=dtypes.float16)
s = c.schedule()[-1]
p = get_program(s.ast, Device[Device.DEFAULT].renderer)
lib = Device[Device.DEFAULT].compiler.compile(p.src)
out = io.StringIO()
with redirect_stdout(out): Device[Device.DEFAULT].compiler.disassemble(lib)
assert "fcvt" not in out.getvalue()
if __name__ == '__main__':
unittest.main()

View File

@@ -1,21 +0,0 @@
import unittest, io
from tinygrad import Tensor, dtypes
from contextlib import redirect_stdout
from tinygrad.device import Device
from tinygrad.helpers import OSX
from tinygrad.engine.realize import get_program
class TestDisassembly(unittest.TestCase):
# TODO: fails on llvm. llvm.LLVMGetHostCPUName() returns "generic"
@unittest.skipUnless(Device.DEFAULT in ("CPU",) and OSX, "m series cpus support fp16 arithmetic")
def test_float16_alu(self):
c = Tensor([1], dtype=dtypes.float16) + Tensor([1], dtype=dtypes.float16)
s = c.schedule()[-1]
p = get_program(s.ast, Device[Device.DEFAULT].renderer)
lib = Device[Device.DEFAULT].compiler.compile(p.src)
out = io.StringIO()
with redirect_stdout(out): Device[Device.DEFAULT].compiler.disassemble(lib)
assert "fcvt" not in out.getvalue()
if __name__ == "__main__":
unittest.main()