diff --git a/tinygrad/gradcheck.py b/tinygrad/gradcheck.py index 6511634fb9..1cf959f374 100644 --- a/tinygrad/gradcheck.py +++ b/tinygrad/gradcheck.py @@ -16,7 +16,7 @@ def jacobian(model, input): ji = input.data.reshape(-1).shape[-1] jo = output.data.reshape(-1).shape[-1] - J = np.zeros((ji, jo)) + J = np.zeros((jo,ji)) for o in range(jo): # tinygrad doesn't support slicing, tiny-hack to select @@ -25,7 +25,7 @@ def jacobian(model, input): o_scalar.backward() for i, grad in enumerate(input.grad.reshape(-1)): - J[i][o] = grad + J[o,i] = grad return J def mask_like(like, mask_inx, mask_value = 1.0): @@ -52,10 +52,10 @@ def numerical_jacobian(model, input, eps = 1e-6): ji = input.data.reshape(-1).shape[-1] jo = output.data.reshape(-1).shape[-1] - NJ = np.zeros((ji, jo)) + NJ = np.zeros((jo, ji)) - for i in range(ji): - for o in range(jo): + for o in range(jo): + for i in range(ji): eps_perturb = mask_like(input.data, i, mask_value = eps) output_perturb_add = model(Tensor(input.data + eps_perturb)).data.reshape(-1)[o] @@ -63,7 +63,7 @@ def numerical_jacobian(model, input, eps = 1e-6): grad_approx = ((output_perturb_add) - (output_perturb_sub)) / (2*eps) - NJ[i,o] = grad_approx + NJ[o,i] = grad_approx return NJ def gradcheck(model, input, eps = 1e-06, atol = 1e-5, rtol = 0.001):