diff --git a/tinygrad/gradcheck.py b/tinygrad/gradcheck.py
index 6511634fb9..1cf959f374 100644
--- a/tinygrad/gradcheck.py
+++ b/tinygrad/gradcheck.py
@@ -16,7 +16,7 @@ def jacobian(model, input):
 
   ji = input.data.reshape(-1).shape[-1]
   jo = output.data.reshape(-1).shape[-1]
-  J = np.zeros((ji, jo))
+  J = np.zeros((jo,ji))
 
   for o in range(jo):
     # tinygrad doesn't support slicing, tiny-hack to select
@@ -25,7 +25,7 @@ def jacobian(model, input):
     o_scalar.backward()
 
     for i, grad in enumerate(input.grad.reshape(-1)):
-      J[i][o] = grad
+      J[o,i] = grad
   return J
 
 def mask_like(like, mask_inx, mask_value = 1.0):
@@ -52,10 +52,10 @@ def numerical_jacobian(model, input, eps = 1e-6):
 
   ji = input.data.reshape(-1).shape[-1]
   jo = output.data.reshape(-1).shape[-1]
-  NJ = np.zeros((ji, jo))
+  NJ = np.zeros((jo, ji))
 
-  for i in range(ji):
-    for o in range(jo):
+  for o in range(jo):
+    for i in range(ji):
 
       eps_perturb = mask_like(input.data, i, mask_value = eps)
       output_perturb_add = model(Tensor(input.data + eps_perturb)).data.reshape(-1)[o]
@@ -63,7 +63,7 @@ def numerical_jacobian(model, input, eps = 1e-6):
 
       grad_approx = ((output_perturb_add) - (output_perturb_sub)) / (2*eps)
 
-      NJ[i,o] = grad_approx
+      NJ[o,i] = grad_approx
   return NJ
 
 def gradcheck(model, input, eps = 1e-06, atol = 1e-5, rtol = 0.001):