From 93bc3c22a031bb6aa6982a3e9331ad088f677553 Mon Sep 17 00:00:00 2001 From: 0xNaN Date: Wed, 21 Oct 2020 00:19:33 +0200 Subject: [PATCH] tiny gradcheck --- test/test_tensor.py | 44 ++++++++++++++++++++++++- tinygrad/gradcheck.py | 75 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 tinygrad/gradcheck.py diff --git a/test/test_tensor.py b/test/test_tensor.py index 59eae21a6a..8af79b73b6 100644 --- a/test/test_tensor.py +++ b/test/test_tensor.py @@ -2,6 +2,7 @@ import numpy as np import torch import unittest from tinygrad.tensor import Tensor, Conv2D +from tinygrad.gradcheck import numerical_jacobian, gradcheck x_init = np.random.randn(1,3).astype(np.float32) W_init = np.random.randn(3,3).astype(np.float32) @@ -32,6 +33,47 @@ class TestTinygrad(unittest.TestCase): for x,y in zip(test_tinygrad(), test_pytorch()): np.testing.assert_allclose(x, y, atol=1e-5) + def test_gradcheck(self): + class TinyModel: + def __init__(self, weights_init): + self.l1 = Tensor(weights_init) + def forward(self, x): + return x.dot(self.l1).relu().logsoftmax() + + class TorchModel(torch.nn.Module): + def __init__(self, weights_init): + super(TorchModel, self).__init__() + self.l1 = torch.nn.Linear(*weights_init.shape, bias = False) + self.l1.weight = torch.nn.Parameter(torch.tensor(weights_init.T, requires_grad = True)) + def forward(self, x): + return torch.nn.functional.log_softmax(self.l1(x).relu(), dim=1) + + layer_weights = np.random.RandomState(1337).random((10, 5)) + input_data = np.random.RandomState(7331).random((1, 10)) - 0.5 + + torch_input = torch.tensor(input_data, requires_grad = True) + torch_model = TorchModel(layer_weights) + torch_out = torch_model(torch_input) + # autograd.grad computes the _sum_ of gradients of given tensors + J_sum = torch.autograd.grad(list(torch_out[0]), torch_input)[0].squeeze().numpy() + + tiny_model = TinyModel(layer_weights) + tiny_input = Tensor(input_data) + tiny_out = tiny_model.forward(tiny_input) + NJ = numerical_jacobian(tiny_model, tiny_input) + NJ_sum = NJ.sum(axis = -1) + + # checking the numerical approx. of J is close to the one provided autograd + np.testing.assert_allclose(J_sum, NJ_sum, atol = 1e-5) + + # test gradcheck + gradcheck_test, _, _ = gradcheck(tiny_model, tiny_input) + self.assertTrue(gradcheck_test) + + # coarse approx. since a "big" eps and the non-linearities of the model + gradcheck_test, j, nj = gradcheck(tiny_model, tiny_input, eps = 0.1) + self.assertFalse(gradcheck_test) + def test_conv2d(self): x = torch.randn((5,2,10,7), requires_grad=True) w = torch.randn((4,2,3,3), requires_grad=True) @@ -48,7 +90,7 @@ class TestTinygrad(unittest.TestCase): np.testing.assert_allclose(w.grad, wt.grad, atol=1e-5) np.testing.assert_allclose(x.grad, xt.grad, atol=1e-5) - + if __name__ == '__main__': unittest.main() diff --git a/tinygrad/gradcheck.py b/tinygrad/gradcheck.py new file mode 100644 index 0000000000..d7991a7542 --- /dev/null +++ b/tinygrad/gradcheck.py @@ -0,0 +1,75 @@ +import numpy as np +from tinygrad.tensor import Tensor + +def mask_like(like, mask_inx, mask_value = 1.0): + mask = np.zeros_like(like).reshape(-1) + mask[mask_inx] = mask_value + return mask.reshape(like.shape) + +def numerical_jacobian(model, input, eps = 1e-6): + """ + Compute the Jacobian through Finite-Difference Approximation. + Somewhat inspired by [1] but not followed closely. + + model : A tinygrad model + input : An input + eps : Perturbation step + + returns: + + NJ : an approx. of the Jacobian + + [1]: https://timvieira.github.io/blog/post/2017/04/21/how-to-test-gradient-implementations/ + """ + output = model.forward(input) + + ji = input.data.reshape(-1).shape[-1] + jo = output.data.reshape(-1).shape[-1] + NJ = np.zeros((ji, jo)) + + for i in range(ji): + for o in range(jo): + + eps_pertub = mask_like(input.data, i, mask_value = eps) + output_perturb_add = model.forward(Tensor(input.data + eps_pertub)).data.reshape(-1)[o] + output_perturb_sub = model.forward(Tensor(input.data - eps_pertub)).data.reshape(-1)[o] + + grad_approx = ((output_perturb_add) - (output_perturb_sub)) / (2*eps) + + NJ[i,o] = grad_approx + return NJ + +def gradcheck(model, input, eps = 1e-06, atol = 1e-5, rtol = 0.001): + """ + Checks whether the numerical approx. of the Jacobian of model w.r.t input is close to the + analitical one (computed through .backward()) + + model : A tinygrad model + input : An input + eps : Perturbation step + atol, rtol: Params for the numpy.allclose test + + returns: + test_passed : Bool, whether the test passed + J : Analytical Jacobian + NJ : Finite-Difference approx. Jacobian + """ + NJ = numerical_jacobian(model, input, eps) + + output = model.forward(input) + + ji = input.data.reshape(-1).shape[-1] + jo = output.data.reshape(-1).shape[-1] + J = np.zeros((ji, jo)) + + for o in range(jo): + # tinygrad doesn't support slicing, tiny-hack to select + # the needed scalar an backpropagate only through it + o_scalar = Tensor(mask_like(output.data, o, 1.)).mul(output).sum() + o_scalar.backward() + + for i, grad in enumerate(input.grad.reshape(-1)): + J[i][o] = grad + + test_passed = np.allclose(J, NJ, atol=atol, rtol=rtol) + return test_passed, J, NJ