Use REAL_DEV for test skips (#10420)

This should fix remote cpu tests flakiness (segfaults were in
`test_data_parallel_resnet_train_step` which is skipped on cpu but wasn't
skipped on remote cpu)
This commit is contained in:
uuuvn
2025-05-20 05:32:14 +05:00
committed by GitHub
parent 9a199ccd81
commit ec9955c956
2 changed files with 9 additions and 8 deletions

View File

@@ -8,7 +8,7 @@ from tinygrad.engine.realize import lower_schedule, BufferCopy, CompiledRunner,
import numpy as np
from hypothesis import given, strategies as strat, settings
from tinygrad.device import is_dtype_supported
from test.helpers import not_support_multi_device
from test.helpers import REAL_DEV, not_support_multi_device
settings.register_profile("my_profile", max_examples=200, deadline=None, derandomize=getenv("DERANDOMIZE_CI", False))
settings.load_profile("my_profile")
@@ -358,8 +358,8 @@ class TestMultiTensor(unittest.TestCase):
np.testing.assert_allclose(y.numpy(), y_shard.numpy(), atol=1e-6, rtol=1e-6)
# NOTE: this is failing on LLVM CI, no idea why. Works locally.
@unittest.skipIf(CI and Device.DEFAULT in ("CUDA", "NV", "LLVM"), "slow")
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
@unittest.skipIf(CI and REAL_DEV in ("CUDA", "NV", "LLVM"), "slow")
@unittest.skipIf(REAL_DEV == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
def test_data_parallel_resnet(self):
from extra.models.resnet import ResNet18
@@ -395,8 +395,8 @@ class TestMultiTensor(unittest.TestCase):
# sometimes there is zeros in these grads... why?
np.testing.assert_allclose(grad, shard_grad, atol=1e-5, rtol=1e-5)
@unittest.skipIf(CI and Device.DEFAULT in ("CUDA", "NV", "LLVM", "CPU"), "slow, and flaky on LLVM/CPU")
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
@unittest.skipIf(CI and REAL_DEV in ("CUDA", "NV", "LLVM", "CPU"), "slow, and flaky on LLVM/CPU")
@unittest.skipIf(REAL_DEV == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
def test_data_parallel_resnet_train_step(self):
from extra.models.resnet import ResNet18
fake_image = Tensor.rand((2, 3, 224//8, 224//8))
@@ -983,7 +983,7 @@ class TestShrinkMultiTensorShardedAxis(unittest.TestCase):
np.testing.assert_allclose(output.numpy(), expected)
@unittest.skipIf(not_support_multi_device(), "no multi")
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
@unittest.skipIf(REAL_DEV == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
class TestBatchNorm(unittest.TestCase):
def test_unsynced_backprop_conv_bn(self):
with Tensor.train():
@@ -1011,7 +1011,7 @@ class TestBatchNorm(unittest.TestCase):
optim.step()
out.numpy()
@unittest.skipIf(Device.DEFAULT == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
@unittest.skipIf(REAL_DEV == "WEBGPU" and not OSX, "WEBGPU Vulkan can only run kernels with up to 10 buffers")
def test_unsynced_backprop_standalone_bn(self):
from extra.lr_scheduler import OneCycleLR
GPUS = (d1, d2)

View File

@@ -2,6 +2,7 @@ import unittest
from tinygrad import Device, dtypes, Tensor
from tinygrad.device import Buffer
from tinygrad.helpers import Context
from test.helpers import REAL_DEV
@unittest.skipUnless(hasattr(Device[Device.DEFAULT].allocator, "_offset"), "subbuffer not supported")
class TestSubBuffer(unittest.TestCase):
@@ -39,7 +40,7 @@ class TestSubBuffer(unittest.TestCase):
out = (vt + 100).tolist()
assert out == [102, 103]
@unittest.skipIf(Device.DEFAULT not in {"CUDA", "NV", "AMD"}, "only NV, AMD, CUDA")
@unittest.skipIf(REAL_DEV not in {"CUDA", "NV", "AMD"}, "only NV, AMD, CUDA")
def test_subbuffer_transfer(self):
t = Tensor.arange(0, 10, dtype=dtypes.uint8).realize()
vt = t[2:5].contiguous().realize()