diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9c8dcd265c..50c1dc0dd9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -110,7 +110,7 @@ jobs: - name: Test ResNet-18 run: DEBUG=2 python3 extra/torch_backend/example.py - name: custom tests - run: python3 extra/torch_backend/test.py + run: python3 -m pytest -n auto extra/torch_backend/test.py --durations=20 - name: Test one op in torch tests run: DEBUG=2 python3 extra/torch_backend/torch_tests.py TestTinyBackendPRIVATEUSE1.test_unary_log_tiny_float32 - name: Test Ops with TINY_BACKEND diff --git a/extra/torch_backend/backend.py b/extra/torch_backend/backend.py index 2826428b5a..c1e145f923 100644 --- a/extra/torch_backend/backend.py +++ b/extra/torch_backend/backend.py @@ -630,7 +630,8 @@ tiny_backend = {**{k:wrap_out(v) for k,v in tiny_backend_out.items()}, **{ Tensor.linspace(start, stop, steps, **({"dtype": _from_torch_dtype(dtype)} if dtype is not None else {})), "aten.topk": Tensor.topk, "aten.constant_pad_nd": lambda self, padding, value=0.0: self.pad(padding, mode="constant", value=value).contiguous(), - "aten.cumsum": lambda self, dim: self.cumsum(dim).contiguous(), # TODO: fix test_simple_cumsum, fails without contiguous for shapes >512 + # TODO: input contiguous is needed to prevent CFGContext circular dependency assertion for shapes >512 (see test_cumsum_arange_large) + "aten.cumsum": lambda self, dim: self.contiguous().cumsum(dim), "aten.logsumexp": lambda self, axis, keepdim=False: self.logsumexp(axis[0], keepdim=keepdim), "aten.roll": Tensor.roll, "aten.logcumsumexp": Tensor.logcumsumexp, diff --git a/extra/torch_backend/test.py b/extra/torch_backend/test.py index cde48acb18..b56e126d8e 100644 --- a/extra/torch_backend/test.py +++ b/extra/torch_backend/test.py @@ -191,6 +191,7 @@ class TestTorchBackend(unittest.TestCase): assert torch.equal(tensor_a, tensor_b) assert not torch.equal(tensor_a, tensor_c) + @unittest.skip("# TODO: this test is slow") def test_linalg_svd(self): A = torch.randn(5, 5, device=device) U, S, Vh = torch.linalg.svd(A) @@ -699,6 +700,15 @@ class TestTorchBackend(unittest.TestCase): expected = np.array([4.0, 3.0, 2.0, 1.0]) np.testing.assert_allclose(a.grad.cpu().numpy(), expected, rtol=1e-5) + def test_cumsum_arange_large(self): + # Tests cumsum with an unrealized arange input with size > 512 (the split threshold) + # This exercises the _split_cumalu path which uses a two-stage algorithm + for size in [513, 1022]: + a = torch.arange(size, dtype=torch.float32, device=device) + result = torch.cumsum(a, dim=0) + expected = torch.arange(size, dtype=torch.float32).cumsum(dim=0) + np.testing.assert_allclose(result.cpu().numpy(), expected.numpy(), rtol=1e-5) + def test_diag_1d_to_2d(self): a = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float32, device=device, requires_grad=True) b = torch.diag(a)