print test durations and add speed (#2107)

* print test durations

* decrease sizes to increase speed

* faster

* GPU/CLANG onnx in seperate runner

* test split, move ONNX CPU CI

* simpler tests

* simpler uops test

* faster

* less cuda apt

* running ninja install

* apt install

* split fancy indexing
This commit is contained in:
George Hotz
2023-10-18 13:46:42 -07:00
committed by GitHub
parent e2a1c2aaa6
commit 15da96f393
8 changed files with 123 additions and 112 deletions

View File

@@ -2,6 +2,8 @@ name: Unit Tests
on:
push:
branches:
- master
pull_request:
workflow_dispatch:
@@ -43,6 +45,22 @@ jobs:
run: sudo apt install sloccount
- name: Check <5000 lines
run: sloccount tinygrad test examples extra; if [ $(sloccount tinygrad | sed -n 's/.*Total Physical Source Lines of Code (SLOC)[ ]*= \([^ ]*\).*/\1/p' | tr -d ',') -gt 5000 ]; then exit 1; fi
- name: Test Docs
run: python docs/abstractions.py
- name: Test Quickstart
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' docs/quickstart.md > quickstart.py && PYTHONPATH=. python quickstart.py
- name: Fuzz Test symbolic
run: python test/external/fuzz_symbolic.py
- name: Fuzz Test shapetracker
run: PYTHONPATH="." python test/external/fuzz_shapetracker.py
- name: Use as an external package
run: |
mkdir $HOME/test_external_dir
cd $HOME/test_external_dir
python -m venv venv
source venv/bin/activate
pip install $GITHUB_WORKSPACE
python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"
testcpuimagenet:
name: CPU and ImageNet to C Tests
@@ -63,32 +81,14 @@ jobs:
key: testing-packages-${{ hashFiles('**/setup.py') }}
- name: Install Dependencies
run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
- name: Test Docs
run: python docs/abstractions.py
- name: Test Quickstart
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' docs/quickstart.md > quickstart.py && PYTHONPATH=. python quickstart.py
- name: Run Pytest
run: python -m pytest -n=auto test/ -k "not (test_efficientnet and models/test_train.py)"
- name: Run ONNX
run: CPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py
- name: Fuzz Test symbolic
run: python test/external/fuzz_symbolic.py
- name: Fuzz Test shapetracker
run: PYTHONPATH="." python test/external/fuzz_shapetracker.py
run: python -m pytest -n=auto test/ -k "not (test_efficientnet and models/test_train.py)" --durations=20
- name: Compile EfficientNet to C
run: PYTHONPATH="." CLANG=1 python examples/compile_efficientnet.py > recognize.c
- name: Compile C to native
run: clang -O2 recognize.c -lm -o recognize
- name: Test EfficientNet
run: curl https://media.istockphoto.com/photos/hen-picture-id831791190 | ./recognize | grep hen
- name: Use as an external package
run: |
mkdir $HOME/test_external_dir
cd $HOME/test_external_dir
python -m venv venv
source venv/bin/activate
pip install $GITHUB_WORKSPACE
python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"
testtorch:
name: Torch Tests
@@ -110,7 +110,7 @@ jobs:
- name: Install Dependencies
run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu
- name: Run Pytest
run: TORCH=1 python -m pytest -n=auto test/
run: TORCH=1 python -m pytest -n=auto test/ --durations=20
- name: Run ONNX
run: TORCH=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py
@@ -118,8 +118,8 @@ jobs:
strategy:
fail-fast: false
matrix:
task: [optimage, openpilot]
name: ${{ matrix.task=='optimage'&&'GPU OPT and IMAGE Tests'|| matrix.task=='openpilot'&&'openpilot (OpenCL) Tests'}}
task: [optimage, openpilot, onnx]
name: ${{ matrix.task=='optimage'&&'GPU OPT and IMAGE Tests' || matrix.task=='openpilot'&&'openpilot (OpenCL) Tests' || matrix.task=='onnx'&&'ONNX Tests' }}
runs-on: ubuntu-20.04
timeout-minutes: 20
@@ -175,6 +175,15 @@ jobs:
run: |
PYTHONPATH="." python test/external/dist/test_world.py
PYTHONPATH="." python test/external/dist/test_collectives.py
- if: ${{ matrix.task == 'onnx' }}
name: Test ONNX (CPU)
run: CPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- if: ${{ matrix.task == 'onnx' }}
name: Test ONNX (GPU)
run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- if: ${{ matrix.task == 'onnx' }}
name: Test ONNX (CLANG)
run: CLANG=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
testmetalwebgpu:
name: Metal and WebGPU Tests
@@ -293,42 +302,35 @@ jobs:
DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
- name: Run pytest (not cuda)
if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton'
run: python -m pytest -n=auto test/ -k '${{matrix.backend=='llvm'&&'not (test_nn.py and test_conv_transpose2d)'||'test'}}' -m 'not exclude_${{matrix.backend}}'
- name: Run ONNX (not cuda)
if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend!='clang'
run: python -m pytest -n=auto test/external/external_test_onnx_backend.py
run: python -m pytest -n=auto test/ -k '${{matrix.backend=='llvm'&&'not (test_nn.py and test_conv_transpose2d)'||'test'}}' -m 'not exclude_${{matrix.backend}}' --durations=20
- name: Run ONNX (only LLVM)
if: matrix.backend == 'llvm'
run: python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Run pytest (cuda)
if: matrix.backend=='cuda'
run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors) and not (test_conv2d and test_tensor.py)' -m 'not exclude_cuda' --ignore=test/external --ignore=test/models
- name: Run pytest (ptx)
if: matrix.backend=='ptx'
run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors) and not (test_conv2d and test_tensor.py)' -m 'not exclude_cuda' --ignore=test/external --ignore=test/models
- name: Run pytest (triton)
if: matrix.backend=='triton'
run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors) and not (test_conv2d and test_tensor.py)' -m 'not exclude_cuda' --ignore=test/external --ignore=test/models
if: matrix.backend=='cuda'||matrix.backend=='ptx'||matrix.backend=='triton'
run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors) and not (test_conv2d and test_tensor.py)' -m 'not exclude_cuda' --ignore=test/external --ignore=test/models --durations=20
testunicorn:
name: ARM64 unicorn Test
runs-on: ubuntu-latest
if: ${{false}}
timeout-minutes: 20
steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: 3.11
- name: Cache python packages
uses: actions/cache@v3
with:
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages
key: testing-arm-packages-${{ hashFiles('**/setup.py') }}
- name: Install cross-assembler
run: |
sudo apt update -y
sudo apt install -y --no-install-recommends gcc-aarch64-linux-gnu
- name: Install dependencies
run: pip install -e '.[testing,arm]' --extra-index-url https://download.pytorch.org/whl/cpu
- name: Test arm
run: CI=1 ARM64=1 CLANG=1 python -m pytest -n=auto test/ -k 'not (test_nn.py and (test_conv_transpose2d or test_conv2d))' --ignore=test/models --ignore=test/test_speed_v_torch.py --ignore=test/test_net_speed.py --ignore=test/test_specific_conv.py --ignore=test/unit/test_disk_tensor.py
#testunicorn:
# name: ARM64 unicorn Test
# runs-on: ubuntu-latest
# timeout-minutes: 20
# steps:
# - name: Checkout Code
# uses: actions/checkout@v3
# - name: Set up Python 3.11
# uses: actions/setup-python@v4
# with:
# python-version: 3.11
# - name: Cache python packages
# uses: actions/cache@v3
# with:
# path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages
# key: testing-arm-packages-${{ hashFiles('**/setup.py') }}
# - name: Install cross-assembler
# run: |
# sudo apt update -y
# sudo apt install -y --no-install-recommends gcc-aarch64-linux-gnu
# - name: Install dependencies
# run: pip install -e '.[testing,arm]' --extra-index-url https://download.pytorch.org/whl/cpu
# - name: Test arm
# run: CI=1 ARM64=1 CLANG=1 python -m pytest -n=auto test/ -k 'not (test_nn.py and (test_conv_transpose2d or test_conv2d))' --ignore=test/models --ignore=test/test_speed_v_torch.py --ignore=test/test_net_speed.py --ignore=test/test_specific_conv.py --ignore=test/unit/test_disk_tensor.py

View File

@@ -68,7 +68,7 @@ class TestEfficientNet(unittest.TestCase):
self.assertEqual(label, "hen")
def test_chicken_bigbatch(self):
label = _infer(self.model, chicken_img, 4)
label = _infer(self.model, chicken_img, 2)
self.assertEqual(label, "hen")
def test_car(self):

View File

@@ -72,7 +72,7 @@ class TestRealWorld(unittest.TestCase):
# NOTE: only test one pass, not testing the dynamic shape autoregressive part
helper_test("test_llama", lambda: (Tensor([[1,]]),), test, 0.22 if CI else 13.5, 126 if CI else 486, all_jitted=True)
@unittest.skipUnless(Device.DEFAULT in JIT_SUPPORTED_DEVICE and Device.DEFAULT not in ["LLVM"], "needs JIT, too long on CI LLVM")
@unittest.skipUnless(Device.DEFAULT in JIT_SUPPORTED_DEVICE and (Device.DEFAULT not in ["LLVM"] or not CI), "needs JIT, too long on CI LLVM")
def test_gpt2(self):
Tensor.default_type = dtypes.float16
@@ -83,7 +83,7 @@ class TestRealWorld(unittest.TestCase):
def test(t): return model(t, 0).realize()
helper_test("test_gpt2", lambda: (Tensor([[1,]]),), test, 0.21 if CI else 0.9, 129 if CI else 369, all_jitted=True)
@unittest.skipUnless(Device.DEFAULT in JIT_SUPPORTED_DEVICE and Device.DEFAULT not in ["LLVM"], "needs JIT, too long on CI LLVM")
@unittest.skipUnless(Device.DEFAULT in JIT_SUPPORTED_DEVICE and (Device.DEFAULT not in ["LLVM", "CLANG"] or not CI), "needs JIT, too long on CI LLVM and CLANG")
def test_train_cifar(self):
# TODO: with default device
#old_default = Device.DEFAULT

View File

@@ -13,7 +13,7 @@ from models.vit import ViT
from models.resnet import ResNet18
import pytest
pytestmark = pytest.mark.exclude_gpu
pytestmark = [pytest.mark.exclude_gpu, pytest.mark.exclude_clang]
BS = getenv("BS", 2)

View File

@@ -2,7 +2,7 @@
import unittest
import numpy as np
from extra.utils import WINDOWS
from tinygrad.helpers import getenv
from tinygrad.helpers import CI
from tinygrad.jit import TinyJit
from tinygrad.tensor import Tensor, Device
from tinygrad.nn import BatchNorm2d, Conv1d, ConvTranspose1d, Conv2d, ConvTranspose2d, Linear, GroupNorm, LayerNorm, LayerNorm2d, Embedding, InstanceNorm
@@ -90,7 +90,7 @@ class TestNN(unittest.TestCase):
_test_linear(Tensor.randn(BS, T, in_dim)) # test with more dims
def test_conv1d(self):
BS, C1, W = 4, 16, 224
BS, C1, W = 4, 16, 224//4
C2, K, S, P = 64, 7, 2, 1
# create in tinygrad
@@ -110,7 +110,7 @@ class TestNN(unittest.TestCase):
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-4, rtol=1e-5)
def test_conv2d(self):
BS, C1, H, W = 4, 16, 224, 224
BS, C1, H, W = 4, 16, 224//4, 224//4
C2, K, S, P = 64, 7, 2, 1
# create in tinygrad
@@ -166,9 +166,9 @@ class TestNN(unittest.TestCase):
Tensor.wino = False
@unittest.skipIf(getenv("CI", "") != "" and (WINDOWS or Device.DEFAULT == "WEBGPU"), "runs out of memory in CI")
@unittest.skipIf(CI and (WINDOWS or Device.DEFAULT == "WEBGPU"), "runs out of memory in CI")
def test_conv_transpose1d(self):
BS, C1, W = 4, 16, 224
BS, C1, W = 4, 16, 224//4
C2, K, S, P = 64, 7, 2, 1
# create in tinygrad
@@ -187,9 +187,9 @@ class TestNN(unittest.TestCase):
torch_z = torch_layer(torch_x)
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-4, rtol=1e-5)
@unittest.skipIf(getenv("CI", "") != "" and (WINDOWS or Device.DEFAULT == "WEBGPU"), "runs out of memory in CI")
@unittest.skipIf(CI and (WINDOWS or Device.DEFAULT == "WEBGPU"), "runs out of memory in CI")
def test_conv_transpose2d(self):
BS, C1, H, W = 4, 16, 224, 224
BS, C1, H, W = 4, 16, 224//4, 224//4
C2, K, S, P = 64, 7, 2, 1
# create in tinygrad

View File

@@ -847,8 +847,8 @@ class TestOps(unittest.TestCase):
def test_conv1d(self):
for bs in [1,8]:
for cin in [1,3]:
for groups in [1,3] if cin == 3 else [1]:
for H in [1,2,5]:
for H in [1,2,5]:
for groups in [1,3] if cin == 3 and H == 5 else [1]:
with self.subTest(batch_size=bs, channels=cin, groups=groups, height=H):
helper_test_op([(bs,cin,11), (6,cin//groups,H)],
lambda x,w: torch.nn.functional.conv1d(x,w,groups=groups).relu(),
@@ -886,13 +886,13 @@ class TestOps(unittest.TestCase):
lambda x,w: Tensor.conv2d(x,w,padding=p).relu(), atol=1e-4)
def test_conv2d(self):
for bs in [1,8]:
for bs in [1,4]:
for cin in [1,3]:
for groups in [1,3] if cin == 3 else [1]:
for H in [1,2,5]:
for W in [1,2,3,5]:
for H in [1,2,3]:
for W in [1,2,3,5]:
for groups in [1,3] if cin == 3 and H == 3 and W == 3 else [1]:
with self.subTest(batch_size=bs, channels=cin, groups=groups, height=H, width=W):
helper_test_op([(bs,cin,11,28), (6,cin//groups,H,W)],
helper_test_op([(bs,cin,11,7), (6,cin//groups,H,W)],
lambda x,w: torch.nn.functional.conv2d(x,w,groups=groups).relu(),
lambda x,w: Tensor.conv2d(x,w,groups=groups).relu(), atol=1e-4, grad_rtol=1e-5)
@@ -1094,7 +1094,7 @@ class TestOps(unittest.TestCase):
def test_cat(self):
for dim in range(-2, 3):
helper_test_op([(45,65, 90), (45,65,90), (45,65,90)], lambda x,y,z: torch.cat((x,y,z), dim), lambda x,y,z: x.cat(y, z, dim=dim))
helper_test_op([(45,65,9), (45,65,9), (45,65,9)], lambda x,y,z: torch.cat((x,y,z), dim), lambda x,y,z: x.cat(y, z, dim=dim))
with self.assertRaises(AssertionError):
a = Tensor(3.14)
@@ -1117,12 +1117,12 @@ class TestOps(unittest.TestCase):
np.testing.assert_allclose(Tensor.stack([a, a]).numpy(), Tensor([3.14, 3.14]).numpy())
def test_repeat(self):
x = Tensor.randn(45, 65, 3)
x = Tensor.randn(4, 6, 3)
base_repeats = [2, 4, 3]
for reps in [[], [4], [2, 1], [3, 2, 2]]:
repeats = base_repeats + reps
helper_test_op([(45, 65, 3)], lambda x: x.repeat(*repeats), lambda x: x.repeat(repeats))
helper_test_op([(4, 6, 3)], lambda x: x.repeat(*repeats), lambda x: x.repeat(repeats))
helper_test_op([()], lambda x: x.repeat(*repeats), lambda x: x.repeat(repeats))
with self.assertRaises(AssertionError):
@@ -1157,7 +1157,7 @@ class TestOps(unittest.TestCase):
n = (x < 0).where(x, 1).numpy()
assert np.all(n == 1.)
def test_slice_fancy_indexing(self):
def _get_index_randoms(self):
# indices cannot have gradient
# TODO currently does not support IndexError for out of bounds idx values
a = torch.randint(low=-1, high=1, size=(2,1,1,1,1,1), dtype=torch.int64, requires_grad=False)
@@ -1166,34 +1166,43 @@ class TestOps(unittest.TestCase):
d = torch.randint(high=4, size=(2,1,1,5,1,1), dtype=torch.int64, requires_grad=False)
e = torch.randint(high=1, size=(1,1,1,1,6,1), dtype=torch.int64, requires_grad=False)
i, j, k, o, p = [Tensor(tor.detach().numpy().astype(np.int32), dtype=dtypes.int32, requires_grad=False) for tor in [a,b,c,d,e]]
return a,b,c,d,e,i,j,k,o,p
def test_slice_fancy_indexing_no_dim_collapse(self):
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
# no dim collapse from int or dim injection from None
helper_test_op([(2,5,15,5,3,4)], lambda x: x[a,b,c,d,e], lambda x: x[i,j,k,o,p])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[:,b,c,d,e], lambda x: x[:,j,k,o,p])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[:,b,c,d,:], lambda x: x[:,j,k,o,:])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[a,b,...], lambda x: x[i,j,...])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[a,...,e], lambda x: x[i,...,p])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[...,c,:,e], lambda x: x[...,k,:,p])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,c,d,e], lambda x: x[i,j,k,o,p])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[:,b,c,d,:], lambda x: x[:,j,k,o,:])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,...], lambda x: x[i,j,...])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,...,e], lambda x: x[i,...,p])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[...,c,:,e], lambda x: x[...,k,:,p])
def test_slice_fancy_indexing_dim_collapse_int(self):
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
# dim collapse from int
helper_test_op([(2,5,15,5,3,4)], lambda x: x[1,b,c,d,e], lambda x: x[1,j,k,o,p])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[a,b,c,d,2], lambda x: x[i,j,k,o,2])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[a,b,3,d,e], lambda x: x[i,j,3,o,p])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[1,b,c,d,2], lambda x: x[1,j,k,o,2])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[1,b,2,d,2], lambda x: x[1,j,2,o,2])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[a,2,2,2,e], lambda x: x[i,2,2,2,p])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[1,:,3:11:2,d,0:2], lambda x: x[1,:,3:11:2,o,0:2])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,b,c,d,e], lambda x: x[1,j,k,o,p])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,3,d,e], lambda x: x[i,j,3,o,p])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,b,2,d,2], lambda x: x[1,j,2,o,2])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,2,2,2,e], lambda x: x[i,2,2,2,p])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,:,3:11:2,d,0:2], lambda x: x[1,:,3:11:2,o,0:2])
def test_slice_fancy_indexing_dim_inject_none(self):
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
# dim injection from None
helper_test_op([(2,5,15,5,3,4)], lambda x: x[None,b,c,d,e], lambda x: x[None,j,k,o,p])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[a,b,c,d,None], lambda x: x[i,j,k,o,None])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[a,b,None,d,e], lambda x: x[i,j,None,o,p])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[a,None,None,None,e], lambda x: x[i,None,None,None,p])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[None,b,None,d,None], lambda x: x[None,j,None,o,None])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[None,b,c,d,None], lambda x: x[None,j,k,o,None])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[a,:,None,d,e], lambda x: x[i,:,None,o,p])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[None,b,c,d,e], lambda x: x[None,j,k,o,p])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,c,d,None], lambda x: x[i,j,k,o,None])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,None,d,e], lambda x: x[i,j,None,o,p])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[None,b,c,d,None], lambda x: x[None,j,k,o,None])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,:,None,d,e], lambda x: x[i,:,None,o,p])
def test_slice_fancy_indexing_dim_inject_and_collapse(self):
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
# dim injection and collapse
helper_test_op([(2,5,15,5,3,4)], lambda x: x[1,b,None,d,1], lambda x: x[1,j,None,o,1])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[None,b,2,d,None], lambda x: x[None,j,2,o,None])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[None,1,None,d,e], lambda x: x[None,1,None,o,p])
helper_test_op([(2,5,15,5,3,4)], lambda x: x[...,1,d,None], lambda x: x[...,1,o,None])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,b,None,d,1], lambda x: x[1,j,None,o,1])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[None,b,2,d,None], lambda x: x[None,j,2,o,None])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[...,1,d,None], lambda x: x[...,1,o,None])
def test_slice_fancy_indexing_with_idx(self):
# indexing using idx with different dim
helper_test_op([(2,3)], lambda x: x[torch.tensor([[0,0,0],[0,0,0]]), torch.tensor(1)], lambda x: x[Tensor([[0,0,0],[0,0,0]]), Tensor(1)])
helper_test_op([(2,3)], lambda x: x[torch.tensor([1]), torch.tensor([[0,0,0],[0,0,0]])], lambda x: x[Tensor([1]), Tensor([[0,0,0],[0,0,0]])])

View File

@@ -44,18 +44,18 @@ class TestUOps(unittest.TestCase):
def _test_uop_fxn(self, bop, fxn, dt=dtypes.float32):
for f in [_test_single_value, _test_single_value_const]:
for a in [-2.0, 0.0, 1.0, 2.0]:
for a in [-2.0, 0.0, 1.0]:
self._equal(f([a], bop, dt), fxn(a))
def _test_bop_fxn(self, bop, fxn, dt=dtypes.float32, no_b_zero=False):
for f in [_test_single_value, _test_single_value_const]:
for a in [-2.0, 0.0, 1.0, 2.0]:
for b in [-3.0, 1.0, 3.0] + ([] if no_b_zero else [0.0]):
for a in [-2.0, 0.0, 1.0]:
for b in [-3.0, 1.0] + ([] if no_b_zero else [0.0]):
self._equal(f([a,b], bop, dt), fxn(a,b))
def _test_top_fxn(self, bop, fxn, dt=dtypes.float32):
for f in [_test_single_value, _test_single_value_const]:
for a in [-2.0, 0, 1, 2.0]:
for a in [-2.0, 0, 1]:
for b in [-3.0, 3.0]:
for c in [-4.0, 4.0]:
self._equal(f([a,b,c], bop, dt), fxn(a,b,c))

View File

@@ -1,5 +1,5 @@
import unittest
from tinygrad.helpers import Timing
from tinygrad.helpers import Timing, CI
from tinygrad.tensor import Tensor
from tinygrad.ops import LoadOps
from tinygrad.codegen.linearizer import Linearizer
@@ -31,9 +31,9 @@ class TestWinograd(unittest.TestCase):
def test_profile(self):
x,w = Tensor.rand(1,4,9,9).realize(), Tensor.rand(4,4,3,3).realize()
pr = start_profile()
if not CI: pr = start_profile()
out = Tensor.conv2d(x,w).realize()
stop_profile(pr, sort='time')
if not CI: stop_profile(pr, sort='time')
out.numpy()
if __name__ == '__main__':