clean up skip slow tests in PYTHON (#12028)

skip with SKIP_SLOW_TEST and decorators
This commit is contained in:
chenyu
2025-09-05 11:35:26 -04:00
committed by GitHub
parent f08299d2ec
commit ce7163e9b4
2 changed files with 48 additions and 1 deletions

View File

@@ -17,6 +17,9 @@ if CI:
FORWARD_ONLY = getenv("FORWARD_ONLY", 0)
PRINT_TENSORS = getenv("PRINT_TENSORS", 0)
def slow_test(test_func):
return unittest.skipIf(getenv("SKIP_SLOW_TEST"), "Skipping slow test")(test_func)
def helper_test_op(shps, torch_fxn, tinygrad_fxn=None, atol=1e-6, rtol=1e-3, grad_atol=1e-4, grad_rtol=1e-3,
forward_only=False, vals=None, low=-2, high=2):
if tinygrad_fxn is None: tinygrad_fxn = torch_fxn
@@ -1009,9 +1012,11 @@ class TestOps(unittest.TestCase):
def test_small_cumsum(self):
helper_test_op([(10)], lambda x: torch.cumsum(x, dim=0), lambda x: Tensor.cumsum(x, axis=0))
@slow_test
def test_simple_cumsum(self):
helper_test_op([(512)], lambda x: torch.cumsum(x, dim=0), lambda x: Tensor.cumsum(x, axis=0))
helper_test_op([(1022)], lambda x: torch.cumsum(x, dim=0), lambda x: Tensor.cumsum(x, axis=0))
@slow_test
def test_cumsum(self):
helper_test_op([()], lambda x: torch.cumsum(x, dim=0), lambda x: Tensor.cumsum(x, axis=0))
self.helper_test_exception([()], lambda x: torch.cumsum(x, dim=1), lambda x: Tensor.cumsum(x, axis=1), expected=IndexError)
@@ -1029,9 +1034,11 @@ class TestOps(unittest.TestCase):
def test_small_cumprod(self):
helper_test_op([(10)],lambda x: torch.cumprod(x, dim=0),lambda x: Tensor.cumprod(x, axis=0))
@slow_test
def test_simple_cumprod(self):
helper_test_op([(512)],lambda x: torch.cumprod(x, dim=0),lambda x: Tensor.cumprod(x, axis=0))
helper_test_op([(1022)],lambda x: torch.cumprod(x, dim=0),lambda x: Tensor.cumprod(x, axis=0))
@slow_test
def test_cumprod(self):
helper_test_op([()],lambda x: torch.cumprod(x, dim=0),lambda x: Tensor.cumprod(x, axis=0))
self.helper_test_exception([()],lambda x: torch.cumprod(x, dim=1),lambda x: Tensor.cumprod(x, axis=1),expected=IndexError)
@@ -1049,9 +1056,11 @@ class TestOps(unittest.TestCase):
def test_small_cummax(self):
helper_test_op([(10)], lambda x: torch.cummax(x, dim=0).values, lambda x: Tensor.cummax(x, axis=0))
@slow_test
def test_simple_cummax(self):
helper_test_op([(512)], lambda x: torch.cummax(x, dim=0).values, lambda x: Tensor.cummax(x, axis=0))
helper_test_op([(1022)], lambda x: torch.cummax(x, dim=0).values, lambda x: Tensor.cummax(x, axis=0))
@slow_test
def test_cummax(self):
helper_test_op([()], lambda x: torch.cummax(x, dim=0).values, lambda x: Tensor.cummax(x, axis=0))
# TODO: torch allows this?
@@ -1148,6 +1157,7 @@ class TestOps(unittest.TestCase):
np.testing.assert_equal(indices.numpy(), [2, 4, 6])
self.helper_test_exception([(4)], lambda x: x.topk(5), expected=(RuntimeError, ValueError))
@slow_test
def test_einsum(self):
# matrix transpose
helper_test_op([(10,10)], lambda a: torch.einsum('ij->ji', a), lambda a: Tensor.einsum('ij->ji', a))
@@ -1195,6 +1205,7 @@ class TestOps(unittest.TestCase):
# bilinear transformation
helper_test_op([(2,3),(5,3,7),(2,7)], lambda a,b,c: torch.einsum('ik,jkl,il->ij', [a,b,c]), lambda a,b,c: Tensor.einsum('ik,jkl,il->ij', [a,b,c]))
@slow_test
def test_einsum_ellipsis(self):
"""The expected behavior for einsum is described in the PyTorch docs: https://pytorch.org/docs/stable/generated/torch.einsum.html"""
# test ellipsis
@@ -1238,6 +1249,7 @@ class TestOps(unittest.TestCase):
self.helper_test_exception([(4), (1,2)], lambda x, y: x.matmul(y), Tensor.dot, expected=RuntimeError)
self.helper_test_exception([(2,1), (4)], lambda x, y: x.matmul(y), Tensor.dot, expected=RuntimeError)
self.helper_test_exception([(1), (4)], lambda x, y: x.matmul(y), Tensor.dot, expected=RuntimeError)
@slow_test
def test_dot(self):
helper_test_op([(45,65), (65,100)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-5)
helper_test_op([(8,45,65), (8,65,100)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-5)
@@ -1298,6 +1310,7 @@ class TestOps(unittest.TestCase):
helper_test_op([(64,64), (64,64)], lambda x,y: x.half().matmul(y.half()), atol=5e-3, rtol=5e-3)
def test_gemm(self):
helper_test_op([(64,64), (64,64)], lambda x,y: x.matmul(y))
@slow_test
def test_big_gemm(self):
helper_test_op([(256,256), (256,256)], lambda x,y: x.matmul(y), atol=1e-4)
@unittest.skipIf(IMAGE>0, "no 0 in shape matmul on images")
@@ -1309,12 +1322,14 @@ class TestOps(unittest.TestCase):
helper_test_op([(0,0), (0,0)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)
helper_test_op([(0), (0,8)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)
helper_test_op([(0), (0)], lambda x,y: x.matmul(y), Tensor.dot, atol=1e-7)
@slow_test
def test_broadcastdot(self):
helper_test_op([(10,45,65), (65,45)], lambda x,y: x @ y, Tensor.dot, atol=1e-4)
with self.assertRaises(RuntimeError):
a = Tensor(3.14)
b = Tensor.ones(3,3)
a @ b
@slow_test
def test_multidot(self):
helper_test_op([(10,45,65), (10,65,45)], lambda x,y: x @ y, Tensor.dot, atol=1e-4)
helper_test_op([(3,3,45,65), (3,3,65,45)], lambda x,y: x @ y, Tensor.dot, atol=1e-4)
@@ -1449,6 +1464,7 @@ class TestOps(unittest.TestCase):
helper_test_op([(15, 25, 35)], lambda x: x.var(correction=5))
# TODO: fix this
# helper_test_op([(10, 2)], lambda x: x.var(correction=50))
@slow_test
def test_var_axis(self):
helper_test_op([(15, 25, 35)], lambda x: x.var(0))
helper_test_op([(15, 25, 35)], lambda x: x.var(2))
@@ -1481,6 +1497,7 @@ class TestOps(unittest.TestCase):
helper_test_op([(15, 25, 35)], lambda x: x.std())
helper_test_op([(15, 25, 35)], lambda x: x.std(correction=0))
helper_test_op([(15, 25, 35)], lambda x: x.std(correction=5))
@slow_test
def test_std_axis(self):
helper_test_op([(15, 25, 35)], lambda x: x.std(0))
helper_test_op([(15, 25, 35)], lambda x: x.std(2))
@@ -1557,6 +1574,7 @@ class TestOps(unittest.TestCase):
helper_test_op([()], lambda x: torch.logsumexp(x, dim=0), lambda x: x.logsumexp(0), atol=1e-7, grad_atol=1e-7)
helper_test_op([()], lambda x: torch.logsumexp(x, dim=-1), lambda x: x.logsumexp(-1), atol=1e-7, grad_atol=1e-7)
@slow_test
def test_logcumsumexp(self):
helper_test_op([(45,65)], lambda x: torch.logcumsumexp(x, dim=0), lambda x: x.logcumsumexp(0), atol=1e-7, grad_atol=1e-7)
helper_test_op([(45,65)], lambda x: torch.logcumsumexp(x, dim=1), lambda x: x.logcumsumexp(1), atol=1e-7, grad_atol=1e-7)
@@ -1626,6 +1644,7 @@ class TestOps(unittest.TestCase):
helper_test_op([(45,65), (45,1)], lambda x,y: x/y)
helper_test_op([(45,65), ()], lambda x,y: x/y)
@slow_test
def test_broadcast_partial(self):
for torch_op, tinygrad_op in [(torch.add, Tensor.add), (torch.sub, Tensor.sub), (torch.mul, Tensor.mul),
(torch.div, Tensor.div), (torch.pow, Tensor.pow)]:
@@ -2048,6 +2067,7 @@ class TestOps(unittest.TestCase):
lambda x,w: torch.nn.functional.conv2d(x,w),
lambda x,w: Tensor.conv2d(x,w), grad_rtol=1e-5)
@slow_test
def test_nested_conv2d(self):
helper_test_op([(1,32,9,9), (32,32,3,3), (32,32,3,3)],
lambda x,w1,w2: torch.nn.functional.conv2d(torch.nn.functional.conv2d(x,w1).relu(), w2),
@@ -2108,6 +2128,7 @@ class TestOps(unittest.TestCase):
lambda x,w,b: torch.nn.functional.conv_transpose2d(x,w,b,output_padding=output_padding,stride=stride),
lambda x,w,b: Tensor.conv_transpose2d(x,w,b,output_padding=output_padding,stride=stride), grad_rtol=1e-5)
@slow_test
@unittest.skipIf(IMAGE>0, "no conv3d on images")
def test_simple_conv_transpose3d(self):
helper_test_op([(2,4,9,9,9), (4,4,3,3,3)],
@@ -2177,6 +2198,7 @@ class TestOps(unittest.TestCase):
self.helper_test_exception([(2,16,2,2), (32,16,3,3)], lambda x,w:torch.nn.functional.conv2d(x,w,padding=(1,1,1)),
lambda x,w: Tensor.conv2d(x,w,padding=(1,1,1)), expected=(RuntimeError, ValueError))
@slow_test
def test_large_input_conv2d(self):
bs = 4
cin = 16
@@ -2329,6 +2351,7 @@ class TestOps(unittest.TestCase):
lambda x: torch.nn.functional.max_pool2d(x, kernel_size=ksz),
lambda x: Tensor.max_pool2d(x, kernel_size=ksz))
@slow_test
def test_max_pool2d(self):
for ksz in [(2,2), (3,3), 2, 3, (3,2), (5,5), (5,1)]:
with self.subTest(kernel_size=ksz):
@@ -2336,6 +2359,7 @@ class TestOps(unittest.TestCase):
lambda x: torch.nn.functional.max_pool2d(x, kernel_size=ksz),
lambda x: Tensor.max_pool2d(x, kernel_size=ksz))
@slow_test
def test_max_pool2d_padding(self):
for ksz in [(2,2), (3,3), 2, 3, (3,2)]:
for p in [1, (1,0), (0,1)]:
@@ -2346,6 +2370,7 @@ class TestOps(unittest.TestCase):
self.helper_test_exception([(4,2,110,28)], lambda x: torch.nn.functional.max_pool2d(x, kernel_size=(2,2), padding=(1,1,1)),
lambda x: Tensor.max_pool2d(x, kernel_size=(2,2), padding=(1,1,1)), expected=(RuntimeError, ValueError))
@slow_test
def test_max_pool2d_asymmetric_padding(self):
for p in [(0,1,0,1), (2,1,2,1), (2,0,2,1)]:
with self.subTest(padding=p):
@@ -2353,12 +2378,14 @@ class TestOps(unittest.TestCase):
lambda x: torch.nn.functional.max_pool2d(torch.nn.functional.pad(x, p, value=float("-inf")), kernel_size=(5,5)),
lambda x: Tensor.max_pool2d(x, kernel_size=(5,5), padding=p))
@slow_test
def test_max_pool2d_padding_int(self):
ksz = (2,2)
helper_test_op([(4,2,11,28)],
lambda x: torch.nn.functional.max_pool2d(x.int(), kernel_size=ksz, padding=1),
lambda x: Tensor.max_pool2d(x.int(), kernel_size=ksz, padding=1), forward_only=True)
@slow_test
def test_max_pool2d_bigger_stride(self):
for stride in [(2,3), (3,2), 2, 3]:
with self.subTest(stride=stride):
@@ -2366,6 +2393,7 @@ class TestOps(unittest.TestCase):
lambda x: torch.nn.functional.max_pool2d(x, kernel_size=(2,2), stride=stride),
lambda x: Tensor.max_pool2d(x, kernel_size=(2,2), stride=stride))
@slow_test
def test_max_pool2d_bigger_stride_dilation(self):
for stride, dilation in zip([(2,3), (3,2), 2, 3, 4], [(3,2), (2,3), 2, 3, 6]):
with self.subTest(stride=stride):
@@ -2379,6 +2407,7 @@ class TestOps(unittest.TestCase):
lambda x: torch.nn.functional.max_pool2d(x, kernel_size=(5,5), stride=1),
lambda x: Tensor.max_pool2d(x, kernel_size=(5,5), stride=1))
@slow_test
def test_max_pool2d_smaller_stride(self):
for stride in [(2,3), (3,2), 2, 3]:
with self.subTest(stride=stride):
@@ -2386,6 +2415,7 @@ class TestOps(unittest.TestCase):
lambda x: torch.nn.functional.max_pool2d(x, kernel_size=(5,5), stride=stride),
lambda x: Tensor.max_pool2d(x, kernel_size=(5,5), stride=stride))
@slow_test
def test_max_pool2d_dilation(self):
for dilation in [(2, 3), (3, 2), 2, 3]:
helper_test_op([(3, 2, 17, 14)],
@@ -2440,6 +2470,7 @@ class TestOps(unittest.TestCase):
lambda x: Tensor.max_pool2d(x, kernel_size=(2,2), stride=1, return_indices=True)[1],
vals=[[[[[1,2]*3]*6]]], forward_only=True) # Tensor([1,2,1,2,1,2]).expand(1,1,6,6)
@slow_test
def test_max_unpool2d(self):
args = {"kernel_size":(5,5), "stride":(6,5)}
helper_test_op([(8,3,50,50)],
@@ -2483,6 +2514,7 @@ class TestOps(unittest.TestCase):
lambda x: torch.nn.functional.avg_pool2d(x, kernel_size=(1,2), padding=(0,1), stride=(5,1)),
lambda x: Tensor.avg_pool2d(x, kernel_size=(1,2), padding=(0,1), stride=(5,1)), rtol=1e-5)
@slow_test
def test_avg_pool2d_padding(self):
shape = (32,2,111,28)
for ksz in [(2,2), (3,3), 2, 3, (3,2)]:
@@ -2504,6 +2536,7 @@ class TestOps(unittest.TestCase):
self.helper_test_exception([shape], lambda x: torch.nn.functional.avg_pool2d(x, kernel_size=(2,2), padding=(1,1,1)),
lambda x: Tensor.avg_pool2d(x, kernel_size=(2,2), padding=(1,1,1)), expected=(RuntimeError, ValueError))
@slow_test
def test_avg_pool2d_padding_not_counted(self):
shape = (32,2,111,28)
for ksz in [(2,2), (3,3), 2, 3, (3,2)]:
@@ -2585,12 +2618,14 @@ class TestOps(unittest.TestCase):
def test_interpolate_nearest_exact(self): self.test_interpolate_nearest("nearest-exact")
@slow_test
def test_interpolate_bilinear(self):
for in_sz, out_sz in [((12,20),(9,31)), ((12,9),(31,20)), ((9,31),(20,12))]:
helper_test_op([(2,3)+in_sz],
lambda x: torch.nn.functional.interpolate(x, size=out_sz, mode="bilinear"),
lambda x: Tensor.interpolate(x, size=out_sz, mode="linear"), atol=1e-4)
@slow_test
def test_interpolate_bilinear_corners_aligned(self):
for in_sz, out_sz in [((12,20),(9,31)), ((12,9),(31,20)), ((9,31),(20,12))]:
helper_test_op([(2,3)+in_sz],
@@ -2609,6 +2644,7 @@ class TestOps(unittest.TestCase):
lambda x: torch.nn.functional.interpolate(x, size=out_sz, mode="trilinear", align_corners=True),
lambda x: Tensor.interpolate(x, size=out_sz, mode="linear", align_corners=True), atol=1e-4)
@slow_test
def test_cat(self):
for dim in range(-2, 3):
helper_test_op([(45,65,9), (45,65,9), (45,65,9)], lambda x,y,z: torch.cat((x,y,z), dim), lambda x,y,z: x.cat(y, z, dim=dim))
@@ -2707,6 +2743,7 @@ class TestOps(unittest.TestCase):
data = [math.inf, -math.inf, math.nan]
helper_test_op((), lambda: torch.tensor(data)[torch.tensor([0, 1, 2])], lambda: Tensor(data)[Tensor([0, 1, 2])])
@slow_test
def test_slice_fancy_indexing_no_dim_collapse(self):
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
# no dim collapse from int or dim injection from None
@@ -2716,6 +2753,7 @@ class TestOps(unittest.TestCase):
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,...,e], lambda x: x[i,...,p])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[...,c,:,e], lambda x: x[...,k,:,p])
@slow_test
def test_slice_fancy_indexing_dim_collapse_int(self):
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
# dim collapse from int
@@ -2725,6 +2763,7 @@ class TestOps(unittest.TestCase):
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,2,2,2,e], lambda x: x[i,2,2,2,p])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,:,3:11:2,d,0:2], lambda x: x[1,:,3:11:2,o,0:2])
@slow_test
def test_slice_fancy_indexing_dim_inject_none(self):
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
# dim injection from None
@@ -2758,6 +2797,7 @@ class TestOps(unittest.TestCase):
helper_test_op([(2,3)], lambda x: x[torch.tensor([[0,1,-1],[-1,-2,0]]), torch.tensor([2,1,-1])],
lambda x: x[Tensor([[0,1,-1],[-1,-2,0]]), Tensor([2,1,-1])])
@slow_test
def test_slice_fancy_indexing_list_indices(self):
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
helper_test_op([(2,5,6,5,3,4)], lambda x: x[[[0]]], lambda x: x[[[0]]])
@@ -2768,6 +2808,7 @@ class TestOps(unittest.TestCase):
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,b,c,[[1],[2],[3]],...], lambda x: x[i,j,k,[[1],[2],[3]],...])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,[2,1,0],c,[-2,1,0],e], lambda x: x[i,[2,1,0],k,[-2,1,0],p])
@slow_test
def test_slice_fancy_indexing_tuple_indices(self):
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
helper_test_op([(2,5,6,5,3,4)], lambda x: x[(((0,),),)], lambda x: x[(((0,),),)])
@@ -2777,6 +2818,7 @@ class TestOps(unittest.TestCase):
helper_test_op([(2,5,6,5,3,4)], lambda x: x[a,((2,),(1,),(0,)),c,(2,1,0)], lambda x: x[i,((2,),(1,),(0,)),k,(2,1,0)])
helper_test_op([(2,5,6,5,3,4)], lambda x: x[1,(2,1,0),None,c,(2,1,0),e], lambda x: x[1,(2,1,0),None,k,(2,1,0),p])
@slow_test
def test_slice_fancy_indexing_list_with_tensors(self):
a,b,c,d,e,i,j,k,o,p = self._get_index_randoms()
helper_test_op([(2,5,6,5,3,4)], lambda x: x[[a]], lambda x: x[[i]])
@@ -2883,6 +2925,7 @@ class TestOps(unittest.TestCase):
with self.assertRaises(TypeError):
Tensor.ones(4).scatter(dim=1, index=Tensor([0]), src=Tensor.ones(4), reduce="add")
@slow_test
def test_scatter_reduce(self):
b = torch.randint(3, size=[3,4,5], dtype=torch.int64, requires_grad=False)
a = Tensor(b.detach().cpu().numpy().astype(np.int32), dtype=dtypes.int32, requires_grad=False)
@@ -2918,15 +2961,18 @@ class TestOps(unittest.TestCase):
lambda x,src: x.half().scatter_reduce(dim=0, index=a, src=src, reduce="sum"),
RuntimeError)
@slow_test
def test_scaled_dot_product_attention(self):
helper_test_op([(32,8,16,64), (32,8,16,64), (32,8,16,64)], torch.nn.functional.scaled_dot_product_attention, Tensor.scaled_dot_product_attention)
helper_test_op([(32,8,16,64), (32,8,16,64), (32,8,16,64), (32,8,16,16)],
lambda x,y,z,m: torch.nn.functional.scaled_dot_product_attention(x,y,z,attn_mask=m),
lambda x,y,z,m: Tensor.scaled_dot_product_attention(x,y,z,attn_mask=m))
@slow_test
def test_scaled_dot_product_attention_mismatch_ls(self):
helper_test_op([(32,8,4,64), (32,8,16,64), (32,8,16,64)], torch.nn.functional.scaled_dot_product_attention, Tensor.scaled_dot_product_attention)
@slow_test
def test_scaled_dot_product_attention_causal(self):
helper_test_op([(32,8,16,64), (32,8,16,64), (32,8,16,64)],
lambda x,y,z: torch.nn.functional.scaled_dot_product_attention(x,y,z,is_causal=True),
@@ -2937,6 +2983,7 @@ class TestOps(unittest.TestCase):
lambda x,y,z,m: Tensor.scaled_dot_product_attention(x,y,z,is_causal=True,attn_mask=m),
expected=RuntimeError)
@slow_test
def test_scaled_dot_product_attention_gqa(self):
helper_test_op([(32,32,16,64), (32,8,16,64), (32,8,16,64)],
lambda x,y,z: torch.nn.functional.scaled_dot_product_attention(x,y,z,enable_gqa=True),