mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
fix TINY_BACKEND=1 cumsum (#14138)
* fix TINY_BACKEND=1 cumsum old hack was wrong, need to apply contiguous on the input * test time * test_linalg_svd is slow
This commit is contained in:
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@@ -110,7 +110,7 @@ jobs:
|
||||
- name: Test ResNet-18
|
||||
run: DEBUG=2 python3 extra/torch_backend/example.py
|
||||
- name: custom tests
|
||||
run: python3 extra/torch_backend/test.py
|
||||
run: python3 -m pytest -n auto extra/torch_backend/test.py --durations=20
|
||||
- name: Test one op in torch tests
|
||||
run: DEBUG=2 python3 extra/torch_backend/torch_tests.py TestTinyBackendPRIVATEUSE1.test_unary_log_tiny_float32
|
||||
- name: Test Ops with TINY_BACKEND
|
||||
|
||||
@@ -630,7 +630,8 @@ tiny_backend = {**{k:wrap_out(v) for k,v in tiny_backend_out.items()}, **{
|
||||
Tensor.linspace(start, stop, steps, **({"dtype": _from_torch_dtype(dtype)} if dtype is not None else {})),
|
||||
"aten.topk": Tensor.topk,
|
||||
"aten.constant_pad_nd": lambda self, padding, value=0.0: self.pad(padding, mode="constant", value=value).contiguous(),
|
||||
"aten.cumsum": lambda self, dim: self.cumsum(dim).contiguous(), # TODO: fix test_simple_cumsum, fails without contiguous for shapes >512
|
||||
# TODO: input contiguous is needed to prevent CFGContext circular dependency assertion for shapes >512 (see test_cumsum_arange_large)
|
||||
"aten.cumsum": lambda self, dim: self.contiguous().cumsum(dim),
|
||||
"aten.logsumexp": lambda self, axis, keepdim=False: self.logsumexp(axis[0], keepdim=keepdim),
|
||||
"aten.roll": Tensor.roll,
|
||||
"aten.logcumsumexp": Tensor.logcumsumexp,
|
||||
|
||||
@@ -191,6 +191,7 @@ class TestTorchBackend(unittest.TestCase):
|
||||
assert torch.equal(tensor_a, tensor_b)
|
||||
assert not torch.equal(tensor_a, tensor_c)
|
||||
|
||||
@unittest.skip("# TODO: this test is slow")
|
||||
def test_linalg_svd(self):
|
||||
A = torch.randn(5, 5, device=device)
|
||||
U, S, Vh = torch.linalg.svd(A)
|
||||
@@ -699,6 +700,15 @@ class TestTorchBackend(unittest.TestCase):
|
||||
expected = np.array([4.0, 3.0, 2.0, 1.0])
|
||||
np.testing.assert_allclose(a.grad.cpu().numpy(), expected, rtol=1e-5)
|
||||
|
||||
def test_cumsum_arange_large(self):
|
||||
# Tests cumsum with an unrealized arange input with size > 512 (the split threshold)
|
||||
# This exercises the _split_cumalu path which uses a two-stage algorithm
|
||||
for size in [513, 1022]:
|
||||
a = torch.arange(size, dtype=torch.float32, device=device)
|
||||
result = torch.cumsum(a, dim=0)
|
||||
expected = torch.arange(size, dtype=torch.float32).cumsum(dim=0)
|
||||
np.testing.assert_allclose(result.cpu().numpy(), expected.numpy(), rtol=1e-5)
|
||||
|
||||
def test_diag_1d_to_2d(self):
|
||||
a = torch.tensor([1.0, 2.0, 3.0], dtype=torch.float32, device=device, requires_grad=True)
|
||||
b = torch.diag(a)
|
||||
|
||||
Reference in New Issue
Block a user