import numpy as np
import torch
import unittest
from tinygrad.tensor import Tensor
from tinygrad.gradcheck import numerical_jacobian, jacobian, gradcheck

x_init = np.random.randn(1,3).astype(np.float32)
W_init = np.random.randn(3,3).astype(np.float32)
m_init = np.random.randn(1,3).astype(np.float32)

class TestTinygrad(unittest.TestCase):
  def test_backward_pass(self):
    def test_tinygrad():
      x = Tensor(x_init)
      W = Tensor(W_init)
      m = Tensor(m_init)
      out = x.dot(W).relu()
      out = out.logsoftmax()
      out = out.mul(m).add(m).sum()
      out.backward()
      return out.data, x.grad, W.grad

    def test_pytorch():
      x = torch.tensor(x_init, requires_grad=True)
      W = torch.tensor(W_init, requires_grad=True)
      m = torch.tensor(m_init)
      out = x.matmul(W).relu()
      out = torch.nn.functional.log_softmax(out, dim=1)
      out = out.mul(m).add(m).sum()
      out.backward()
      return out.detach().numpy(), x.grad, W.grad

    for x,y in zip(test_tinygrad(), test_pytorch()):
      np.testing.assert_allclose(x, y, atol=1e-5)

  def test_jacobian(self):
    W = np.random.RandomState(1337).random((10, 5))
    x = np.random.RandomState(7331).random((1, 10)) - 0.5

    torch_x = torch.tensor(x, requires_grad=True)
    torch_W = torch.tensor(W, requires_grad=True)
    torch_func = lambda x: torch.nn.functional.log_softmax(x.matmul(torch_W).relu(), dim=1)
    PJ = torch.autograd.functional.jacobian(torch_func, torch_x).squeeze().numpy()

    tiny_x = Tensor(x)
    tiny_W = Tensor(W)
    tiny_func = lambda x: x.dot(tiny_W).relu().logsoftmax()
    J = jacobian(tiny_func, tiny_x)
    NJ = numerical_jacobian(tiny_func, tiny_x)

    np.testing.assert_allclose(PJ, J, atol = 1e-5)
    np.testing.assert_allclose(PJ, NJ, atol = 1e-5)

  def test_gradcheck(self):
    W = np.random.RandomState(1337).random((10, 5))
    x = np.random.RandomState(7331).random((1, 10)) - 0.5

    tiny_x = Tensor(x)
    tiny_W = Tensor(W)
    tiny_func = lambda x: x.dot(tiny_W).relu().logsoftmax()

    self.assertTrue(gradcheck(tiny_func, tiny_x))

    # coarse approx. since a "big" eps and the non-linearities of the model
    self.assertFalse(gradcheck(tiny_func, tiny_x, eps = 0.1))

class TestOps(unittest.TestCase):
  def test_conv2d(self):
    for cin in [1,2,3]:
      for H in [2,3,5]:
        for W in [2,3,5]:
          x = torch.randn((5,cin,10,7), requires_grad=True)
          w = torch.randn((4,cin,H,W), requires_grad=True)
          xt = Tensor(x.detach().numpy())
          wt = Tensor(w.detach().numpy())

          out = torch.nn.functional.conv2d(x,w)
          ret = Tensor.conv2d(xt, wt)
          np.testing.assert_allclose(ret.data, out.detach().numpy(), atol=1e-5)

          out.mean().backward()
          ret.mean().backward()

          np.testing.assert_allclose(w.grad, wt.grad, atol=1e-7)
          np.testing.assert_allclose(x.grad, xt.grad, atol=1e-7)

  def test_maxpool2x2(self):
    x = torch.randn((5,2,10,8), requires_grad=True)
    xt = Tensor(x.detach().numpy())

    # in tinygrad
    ret = xt.maxpool2x2()
    assert ret.shape == (5,2,10//2,8//2)
    ret.mean().backward()

    # in torch
    out = torch.nn.MaxPool2d((2,2))(x)
    out.mean().backward()

    # forward and backward the same
    np.testing.assert_allclose(ret.data, out.detach().numpy(), atol=1e-5)
    np.testing.assert_allclose(x.grad, xt.grad, atol=1e-5)

if __name__ == '__main__':
  unittest.main()