mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-08 22:48:25 -05:00
cleanup tests, bump caches (#11746)
This commit is contained in:
14
.github/actions/setup-tinygrad/action.yml
vendored
14
.github/actions/setup-tinygrad/action.yml
vendored
@@ -121,7 +121,7 @@ runs:
|
||||
echo 'Acquire::GzipIndexes "true";' | sudo tee /etc/apt/apt.conf.d/gzip
|
||||
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel
|
||||
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' | sudo tee -a /etc/apt/apt.conf.d/99keep-debs
|
||||
|
||||
|
||||
- name: Add OpenCL Repo
|
||||
if: inputs.opencl == 'true' && runner.os == 'Linux'
|
||||
shell: bash
|
||||
@@ -174,7 +174,7 @@ runs:
|
||||
if [[ "${{ inputs.llvm }}" == "true" ]]; then
|
||||
pkgs+=" libllvm20 clang-20 lld-20"
|
||||
fi
|
||||
|
||||
|
||||
echo "pkgs=$pkgs" >> "$GITHUB_OUTPUT"
|
||||
echo "hash=$(echo -n "$pkgs" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
@@ -183,21 +183,21 @@ runs:
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: /var/cache/apt/archives/
|
||||
key: ${{ runner.os }}-apt-${{ steps.apt-pkgs.outputs.hash }}
|
||||
key: ${{ runner.os }}-apt-${{ steps.apt-pkgs.outputs.hash }}-${{ env.APT_CACHE_VERSION }}
|
||||
|
||||
- name: Run apt Update + Install
|
||||
if: runner.os == 'Linux' && (inputs.opencl == 'true' || inputs.amd == 'true' || inputs.cuda == 'true' || inputs.webgpu == 'true' || inputs.llvm == 'true')
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt -qq update || true
|
||||
|
||||
|
||||
# ******** do install ********
|
||||
if [[ -n "${{ steps.apt-pkgs.outputs.pkgs }}" ]]; then
|
||||
sudo apt-get -y --allow-unauthenticated --no-install-recommends install ${{ steps.apt-pkgs.outputs.pkgs }}
|
||||
fi
|
||||
|
||||
|
||||
sudo chown -R $USER:$USER /var/cache/apt/archives/
|
||||
|
||||
|
||||
# **** AMD ****
|
||||
- name: Setup AMD (Linux)
|
||||
if: inputs.amd == 'true' && runner.os == 'Linux'
|
||||
@@ -234,7 +234,7 @@ runs:
|
||||
cache-name: cache-gpuocelot-build
|
||||
with:
|
||||
path: ${{ github.workspace }}/gpuocelot/ocelot
|
||||
key: ${{ runner.os }}-gpuocelot-b16039dc940dc6bc4ea0a98380495769ff35ed99-rebuild-0
|
||||
key: ${{ runner.os }}-gpuocelot-b16039dc940dc6bc4ea0a98380495769ff35ed99-rebuild-${{ env.BUILD_CACHE_VERSION }}
|
||||
- name: Clone/compile gpuocelot
|
||||
if: inputs.ocelot == 'true' && steps.cache-build.outputs.cache-hit != 'true'
|
||||
shell: bash
|
||||
|
||||
6
.github/workflows/benchmark.yml
vendored
6
.github/workflows/benchmark.yml
vendored
@@ -63,7 +63,7 @@ jobs:
|
||||
- name: Run model inference benchmark
|
||||
run: METAL=1 python3.11 test/external/external_model_benchmark.py
|
||||
- name: Test speed vs torch
|
||||
run: BIG=2 MPS=1 python3.11 test/external/external_test_speed_v_torch.py | tee torch_speed.txt
|
||||
run: BIG=2 MPS=1 python3.11 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
|
||||
- name: Test tensor cores
|
||||
run: METAL=1 python3.11 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops
|
||||
- name: Test AMX tensor cores
|
||||
@@ -187,7 +187,7 @@ jobs:
|
||||
- name: Run model inference benchmark
|
||||
run: NV=1 CAPTURE_PROCESS_REPLAY=0 NOCLANG=1 python3 test/external/external_model_benchmark.py
|
||||
- name: Test speed vs torch
|
||||
run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/external/external_test_speed_v_torch.py | tee torch_speed.txt
|
||||
run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
|
||||
- name: Test speed vs theoretical
|
||||
run: NV=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
|
||||
- name: Test benchmark allreduce
|
||||
@@ -389,7 +389,7 @@ jobs:
|
||||
#- name: Test speed vs torch
|
||||
# run: |
|
||||
# python3 -c "import torch; print(torch.__version__)"
|
||||
# LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/external/external_test_speed_v_torch.py | tee torch_speed.txt
|
||||
# LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
|
||||
- name: Test speed vs theoretical
|
||||
run: AMD=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
|
||||
- name: Test tensor cores
|
||||
|
||||
10
.github/workflows/test.yml
vendored
10
.github/workflows/test.yml
vendored
@@ -1,8 +1,10 @@
|
||||
name: Unit Tests
|
||||
env:
|
||||
# increment this when downloads substantially change to avoid the internet
|
||||
DOWNLOAD_CACHE_VERSION: '11'
|
||||
PYTHON_CACHE_VERSION: '2'
|
||||
DOWNLOAD_CACHE_VERSION: '12'
|
||||
PYTHON_CACHE_VERSION: '3'
|
||||
APT_CACHE_VERSION: '1'
|
||||
BUILD_CACHE_VERSION: '1'
|
||||
CAPTURE_PROCESS_REPLAY: 1
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -30,9 +32,9 @@ jobs:
|
||||
- name: External Benchmark Schedule
|
||||
run: PYTHONPATH="." python3 test/external/external_benchmark_schedule.py
|
||||
- name: Speed Test
|
||||
run: LLVM=1 python3 test/external/external_test_speed_v_torch.py
|
||||
run: LLVM=1 python3 test/speed/external_test_speed_v_torch.py
|
||||
- name: Speed Test (BEAM=2)
|
||||
run: BEAM=2 LLVM=1 python3 test/external/external_test_speed_v_torch.py
|
||||
run: BEAM=2 LLVM=1 python3 test/speed/external_test_speed_v_torch.py
|
||||
|
||||
docs:
|
||||
name: Docs
|
||||
|
||||
@@ -2,7 +2,7 @@ import time
|
||||
from tinygrad import Tensor, TinyJit, Device, Context
|
||||
from tinygrad.helpers import Profiling, Timing, GlobalCounters
|
||||
|
||||
# python3 test/external/external_test_speed_v_torch.py TestSpeed.test_add_a
|
||||
# python3 test/speed/external_test_speed_v_torch.py TestSpeed.test_add_a
|
||||
|
||||
@TinyJit
|
||||
def plus(a:Tensor, b:Tensor): return a+b
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import unittest
|
||||
import unittest, io
|
||||
from contextlib import redirect_stdout
|
||||
from tinygrad import Tensor, dtypes, Device
|
||||
from tinygrad.helpers import OSX
|
||||
from tinygrad.engine.realize import lower_schedule
|
||||
from tinygrad.device import is_dtype_supported
|
||||
from tinygrad.engine.realize import get_program
|
||||
|
||||
class TestCompileFailures(unittest.TestCase):
|
||||
def compile(self, out:Tensor):
|
||||
@@ -14,5 +17,17 @@ class TestCompileFailures(unittest.TestCase):
|
||||
def test_add_max_uchar(self):
|
||||
self.compile((Tensor.empty(1024, dtype='uint8') + Tensor.empty(1024, dtype='uint8')).max())
|
||||
|
||||
class TestDisassembly(unittest.TestCase):
|
||||
# TODO: fails on llvm. llvm.LLVMGetHostCPUName() returns "generic"
|
||||
@unittest.skipUnless(Device.DEFAULT in ("CPU",) and OSX, "m series cpus support fp16 arithmetic")
|
||||
def test_float16_alu(self):
|
||||
c = Tensor([1], dtype=dtypes.float16) + Tensor([1], dtype=dtypes.float16)
|
||||
s = c.schedule()[-1]
|
||||
p = get_program(s.ast, Device[Device.DEFAULT].renderer)
|
||||
lib = Device[Device.DEFAULT].compiler.compile(p.src)
|
||||
out = io.StringIO()
|
||||
with redirect_stdout(out): Device[Device.DEFAULT].compiler.disassemble(lib)
|
||||
assert "fcvt" not in out.getvalue()
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
import unittest, io
|
||||
from tinygrad import Tensor, dtypes
|
||||
from contextlib import redirect_stdout
|
||||
from tinygrad.device import Device
|
||||
from tinygrad.helpers import OSX
|
||||
from tinygrad.engine.realize import get_program
|
||||
|
||||
class TestDisassembly(unittest.TestCase):
|
||||
# TODO: fails on llvm. llvm.LLVMGetHostCPUName() returns "generic"
|
||||
@unittest.skipUnless(Device.DEFAULT in ("CPU",) and OSX, "m series cpus support fp16 arithmetic")
|
||||
def test_float16_alu(self):
|
||||
c = Tensor([1], dtype=dtypes.float16) + Tensor([1], dtype=dtypes.float16)
|
||||
s = c.schedule()[-1]
|
||||
p = get_program(s.ast, Device[Device.DEFAULT].renderer)
|
||||
lib = Device[Device.DEFAULT].compiler.compile(p.src)
|
||||
out = io.StringIO()
|
||||
with redirect_stdout(out): Device[Device.DEFAULT].compiler.disassemble(lib)
|
||||
assert "fcvt" not in out.getvalue()
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user