mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-01-09 06:58:11 -05:00
@@ -2,7 +2,7 @@ import unittest, functools, random
|
||||
from tinygrad import Tensor, Device, nn, GlobalCounters, TinyJit, dtypes, Variable
|
||||
from tinygrad.device import is_dtype_supported
|
||||
from tinygrad.uop.ops import Ops, UOp
|
||||
from tinygrad.helpers import CI, getenv, prod, Context, OSX
|
||||
from tinygrad.helpers import CI, getenv, prod, Context
|
||||
from tinygrad.nn.state import get_parameters, get_state_dict
|
||||
from tinygrad.engine.realize import lower_schedule, BufferCopy, CompiledRunner, run_schedule
|
||||
import numpy as np
|
||||
@@ -374,7 +374,6 @@ class TestMultiTensor(unittest.TestCase):
|
||||
|
||||
# NOTE: this is failing on LLVM CI, no idea why. Works locally.
|
||||
@unittest.skipIf(CI and REAL_DEV in ("CUDA", "NV", "LLVM", "CPU"), "slow, and flaky on LLVM/CPU")
|
||||
@unittest.skipIf(REAL_DEV == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
|
||||
def test_data_parallel_resnet(self):
|
||||
from extra.models.resnet import ResNet18
|
||||
|
||||
@@ -411,7 +410,6 @@ class TestMultiTensor(unittest.TestCase):
|
||||
np.testing.assert_allclose(grad, shard_grad, atol=1e-5, rtol=1e-5)
|
||||
|
||||
@unittest.skipIf(CI and REAL_DEV in ("CUDA", "NV", "LLVM", "CPU"), "slow, and flaky on LLVM/CPU")
|
||||
@unittest.skipIf(REAL_DEV == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
|
||||
def test_data_parallel_resnet_train_step(self):
|
||||
from extra.models.resnet import ResNet18
|
||||
fake_image = Tensor.rand((2, 3, 224//8, 224//8))
|
||||
@@ -938,7 +936,6 @@ class TestShrinkMultiTensorShardedAxis(unittest.TestCase):
|
||||
np.testing.assert_allclose(output.numpy(), expected)
|
||||
|
||||
@unittest.skipIf(not_support_multi_device(), "no multi")
|
||||
@unittest.skipIf(REAL_DEV == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
|
||||
class TestBatchNorm(unittest.TestCase):
|
||||
def test_unsynced_backprop_conv_bn(self):
|
||||
with Tensor.train():
|
||||
@@ -966,7 +963,6 @@ class TestBatchNorm(unittest.TestCase):
|
||||
optim.step()
|
||||
out.numpy()
|
||||
|
||||
@unittest.skipIf(REAL_DEV == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
|
||||
def test_unsynced_backprop_standalone_bn(self):
|
||||
from extra.lr_scheduler import OneCycleLR
|
||||
GPUS = (d1, d2)
|
||||
|
||||
@@ -4,7 +4,7 @@ import numpy as np
|
||||
import torch
|
||||
from tinygrad import Tensor, Device, TinyJit
|
||||
from tinygrad.uop.ops import Ops
|
||||
from tinygrad.helpers import GlobalCounters, CI, Context, OSX
|
||||
from tinygrad.helpers import GlobalCounters, CI, Context
|
||||
from tinygrad.nn import Conv1d, ConvTranspose1d, Conv2d, ConvTranspose2d, Linear, Embedding
|
||||
from tinygrad.nn import BatchNorm, LayerNorm, LayerNorm2d, GroupNorm, InstanceNorm, RMSNorm, LSTMCell
|
||||
from tinygrad.nn.state import load_state_dict
|
||||
@@ -284,7 +284,6 @@ class TestNN(unittest.TestCase):
|
||||
torch_z = torch_layer(torch_x)
|
||||
np.testing.assert_allclose(z.numpy(), torch_z.detach().numpy(), atol=5e-4, rtol=1e-5)
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
|
||||
def test_groupnorm(self):
|
||||
BS, H, W, C, G = 20, 10, 10, 6, 3
|
||||
|
||||
@@ -311,7 +310,6 @@ class TestNN(unittest.TestCase):
|
||||
np.testing.assert_allclose(layer.weight.grad.numpy(), torch_layer.weight.grad.detach().numpy(), atol=5e-4, rtol=5e-4)
|
||||
np.testing.assert_allclose(layer.bias.grad.numpy(), torch_layer.bias.grad.detach().numpy(), atol=5e-4, rtol=5e-4)
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
|
||||
def test_layernorm(self):
|
||||
N, C, H, W = 20, 5, 10, 10
|
||||
|
||||
@@ -338,7 +336,6 @@ class TestNN(unittest.TestCase):
|
||||
np.testing.assert_allclose(layer.weight.grad.numpy(), torch_layer.weight.grad.detach().numpy(), atol=5e-4, rtol=5e-4)
|
||||
np.testing.assert_allclose(layer.bias.grad.numpy(), torch_layer.bias.grad.detach().numpy(), atol=5e-4, rtol=5e-4)
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
|
||||
def test_layernorm_2d(self):
|
||||
N, C, H, W = 20, 5, 10, 10
|
||||
|
||||
@@ -365,7 +362,6 @@ class TestNN(unittest.TestCase):
|
||||
np.testing.assert_allclose(layer.weight.grad.numpy(), torch_layer.weight.grad.detach().numpy(), atol=5e-4, rtol=5e-4)
|
||||
np.testing.assert_allclose(layer.bias.grad.numpy(), torch_layer.bias.grad.detach().numpy(), atol=5e-4, rtol=5e-4)
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
|
||||
def test_instancenorm_2d(self):
|
||||
N, C, H, W = 20, 10, 10, 10
|
||||
|
||||
@@ -392,7 +388,6 @@ class TestNN(unittest.TestCase):
|
||||
np.testing.assert_allclose(layer.weight.grad.numpy(), torch_layer.weight.grad.detach().numpy(), atol=1e-3, rtol=1e-3)
|
||||
np.testing.assert_allclose(layer.bias.grad.numpy(), torch_layer.bias.grad.detach().numpy(), atol=1e-3, rtol=1e-3)
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
|
||||
def test_instancenorm_3d(self):
|
||||
N, C, D, H, W = 20, 10, 10, 10, 10
|
||||
|
||||
@@ -419,7 +414,6 @@ class TestNN(unittest.TestCase):
|
||||
np.testing.assert_allclose(layer.weight.grad.numpy(), torch_layer.weight.grad.detach().numpy(), atol=2e-3, rtol=1e-3)
|
||||
np.testing.assert_allclose(layer.bias.grad.numpy(), torch_layer.bias.grad.detach().numpy(), atol=1e-3, rtol=1e-3)
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
|
||||
def test_rmsnorm(self):
|
||||
class TorchRMSNorm(torch.nn.Module):
|
||||
# https://github.com/meta-llama/llama/blob/be327c427cc5e89cc1d3ab3d3fec4484df771245/llama/model.py#L34C1-L77C36
|
||||
|
||||
@@ -2,7 +2,7 @@ import time, math, unittest, functools, platform, warnings
|
||||
import numpy as np
|
||||
from typing import List, Callable
|
||||
import torch
|
||||
from tinygrad.helpers import getenv, IMAGE, DEBUG, CI, Context, TRANSCENDENTAL, OSX, AMD_LLVM
|
||||
from tinygrad.helpers import getenv, IMAGE, DEBUG, CI, Context, TRANSCENDENTAL, AMD_LLVM
|
||||
from tinygrad import Tensor, Device, dtypes
|
||||
from tinygrad.tensor import _to_np_dtype
|
||||
from tinygrad.device import is_dtype_supported
|
||||
@@ -2682,7 +2682,6 @@ class TestOps(unittest.TestCase):
|
||||
i, j, k, o, p = [Tensor(tor.detach().cpu().numpy().astype(np.int32), requires_grad=False) for tor in [a,b,c,d,e]]
|
||||
return a,b,c,d,e,i,j,k,o,p
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "WEBGPU can only run kernels with up to 10 buffers")
|
||||
def test_slice_fancy_indexing_no_dim_collapse(self):
|
||||
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
|
||||
# no dim collapse from int or dim injection from None
|
||||
@@ -2734,7 +2733,6 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op([(2,3)], lambda x: x[torch.tensor([[0,1,-1],[-1,-2,0]]), torch.tensor([2,1,-1])],
|
||||
lambda x: x[Tensor([[0,1,-1],[-1,-2,0]]), Tensor([2,1,-1])])
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "WEBGPU can only run kernels with up to 10 buffers")
|
||||
def test_slice_fancy_indexing_list_indices(self):
|
||||
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
|
||||
helper_test_op([(2,5,6,5,3,4)], lambda x: x[[[0]]], lambda x: x[[[0]]])
|
||||
@@ -2754,7 +2752,6 @@ class TestOps(unittest.TestCase):
|
||||
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,((2,),(1,),(0,)),c,(2,1,0)], lambda x: x[i,((2,),(1,),(0,)),k,(2,1,0)])
|
||||
helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,(2,1,0),None,c,(2,1,0),e], lambda x: x[1,(2,1,0),None,k,(2,1,0),p])
|
||||
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
|
||||
def test_slice_fancy_indexing_list_with_tensors(self):
|
||||
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
|
||||
helper_test_op([(2,5,6,5,3,4)], lambda x: x[[a]], lambda x: x[[i]])
|
||||
|
||||
@@ -3,6 +3,7 @@ import numpy as np
|
||||
from tinygrad import Tensor, Variable, Device
|
||||
from tinygrad.helpers import OSX
|
||||
|
||||
# TODO: still fails with MAX_KERNEL_BUFFERS
|
||||
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
|
||||
class TestSample(unittest.TestCase):
|
||||
def test_sample(self):
|
||||
|
||||
Reference in New Issue
Block a user