From 1b8c40234fd716af04274a22f57a2bcb76df736b Mon Sep 17 00:00:00 2001
From: George Hotz <72895+geohot@users.noreply.github.com>
Date: Wed, 23 Aug 2023 12:00:06 -0700
Subject: [PATCH] Uast start (#1650)

* work

* more tests

* more tests 2

* don't break it
---
 test/test_conv.py  | 41 +++++++++++++++++++++++------------------
 tinygrad/ops.py    |  5 ++---
 tinygrad/tensor.py |  2 +-
 3 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/test/test_conv.py b/test/test_conv.py
index cacb1a5f18..b37198b0c1 100644
--- a/test/test_conv.py
+++ b/test/test_conv.py
@@ -7,14 +7,19 @@ pytestmark = [pytest.mark.exclude_cuda]
 
 class TestConv(unittest.TestCase):
   def test_simple(self):
-    x = Tensor.ones(1,12,128,256)
-    w = Tensor.ones(32,12,3,3)
+    x = Tensor.ones(1,12,128,256).contiguous().realize()
+    w = Tensor.ones(32,12,3,3).contiguous().realize()
     ret = x.conv2d(w, stride=(2,2), padding=(1,1)).numpy()
     # it's not 108 around the padding
     assert (ret[:, :, 1:-1, 1:-1] == 108).all()
     assert ret[0,0,0,0] == 48
     assert ret[0,0,0,1] == 72
 
+  def test_simple_rand(self):
+    x = Tensor.rand(1,12,128,256)
+    w = Tensor.rand(32,12,3,3)
+    ret = x.conv2d(w, stride=(2,2), padding=(1,1)).numpy()
+
   def test_many_simple(self):
     x = Tensor(np.arange(8*2*8).reshape(1,8,2,8).astype(np.float32))
     #w = Tensor(np.arange(8*8*1*1).reshape(8,8,1,1).astype(np.float32))
@@ -24,15 +29,15 @@ class TestConv(unittest.TestCase):
 
   def test_lazycache(self):
     Tensor.no_grad = True
-    x = Tensor.zeros(1, 32)
-    y = Tensor.zeros(32)
+    x = Tensor.rand(1, 32)
+    y = Tensor.rand(32)
     out = x + y.reshape((1,32,1)).reshape((1,32)) + y.reshape((1,32,1)).reshape((1,32))
     out.numpy()
     Tensor.no_grad = False
 
   def test_simple_biased(self):
     C = 8
-    x = Tensor.zeros(1,C,5,5)
+    x = Tensor.rand(1,C,5,5)
     w = Tensor.eye(C).reshape((C,C,1,1))
     b = Tensor(np.arange(C).astype(np.float32))
     ret = Tensor.conv2d(x,w,b).relu().conv2d(w,b)
@@ -61,15 +66,15 @@ class TestConv(unittest.TestCase):
 
   def test_first_three(self):
     Tensor.no_grad = True
-    x = Tensor.ones(1,12,128,256)
+    x = Tensor.rand(1,12,128,256)
 
-    w = Tensor.ones(32,12,3,3)
+    w = Tensor.rand(32,12,3,3)
     x = x.conv2d(w, stride=(2,2), padding=(1,1)).elu()
 
-    w = Tensor.ones(32,1,3,3)
+    w = Tensor.rand(32,1,3,3)
     x = x.conv2d(w, padding=(1,1), groups=32).elu()
 
-    w = Tensor.ones(16,32,1,1)
+    w = Tensor.rand(16,32,1,1)
     x = x.conv2d(w).elu()
 
     x = x.numpy()
@@ -78,21 +83,21 @@ class TestConv(unittest.TestCase):
 
   def test_elu(self):
     Tensor.no_grad = True
-    x = Tensor.ones(1,12,128,256)
+    x = Tensor.rand(1,12,128,256)
 
-    w = Tensor.ones(32,12,3,3)
+    w = Tensor.rand(32,12,3,3)
     x = x.conv2d(w, stride=(2,2), padding=(1,1))
 
     x = x.elu()
 
-    w = Tensor.ones(32,1,3,3)
+    w = Tensor.rand(32,1,3,3)
     x = x.conv2d(w, padding=(1,1), groups=32)
     out = x.numpy()
     Tensor.no_grad = False
 
   def test_reduce_relu(self):
     Tensor.no_grad = True
-    x = Tensor.ones(1,12,128,256)
+    x = Tensor.rand(1,12,128,256)
     x = x.sum(keepdim=True).relu()
     out = x.numpy()
     Tensor.no_grad = False
@@ -100,7 +105,7 @@ class TestConv(unittest.TestCase):
   def test_bias(self):
     Tensor.no_grad = True
     from tinygrad.nn import Conv2d
-    x = Tensor.ones(1,12,128,256)
+    x = Tensor.rand(1,12,128,256)
     c = Conv2d(12, 32, 3)
     x = c(x).relu()
     w = Tensor.uniform(32, 1, 3, 3)
@@ -109,13 +114,13 @@ class TestConv(unittest.TestCase):
     Tensor.no_grad = False
 
   def test_multiadd(self):
-    w = Tensor.ones(32)
-    x = Tensor.ones(32).relu()
+    w = Tensor.rand(32)
+    x = Tensor.rand(32).relu()
     (w+x).numpy()
 
   def test_reorder(self):
-    x = Tensor.ones(1,12,128,256)
-    w = Tensor.ones(12,12,3,3)
+    x = Tensor.rand(1,12,128,256)
+    w = Tensor.rand(12,12,3,3)
     x = x.conv2d(w, padding=(1,1))
     print(x.shape)
     x = x.reshape((1, 12, 256, 128))
diff --git a/tinygrad/ops.py b/tinygrad/ops.py
index 6e35b97c64..ec226718f0 100644
--- a/tinygrad/ops.py
+++ b/tinygrad/ops.py
@@ -182,10 +182,9 @@ class Compiled:
     k.linearize()
     ret = self.renderer(k.function_name, k.uops)
     src, global_size, local_size, binary = ret if len(ret) == 4 else ret + (False,)
-    #TODO: I need to find a better way to select ARM64
     return ASTRunner(k.function_name, src, global_size, local_size,
-                      op_estimate=k.info.flops, mem_estimate=k.mem_estimate,
-                      display_name=k.display_name, runtime_args={"binary": binary}).build(self.runtime)
+                     op_estimate=k.info.flops, mem_estimate=k.mem_estimate,
+                     display_name=k.display_name, runtime_args={"binary": binary}).build(self.runtime)
 
   def exec_ast(self, ast:LazyOp, output, **kwargs):
     # all movementops do nothing in a Compiled buffer!
diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py
index 435620b1a8..0c83c1cb91 100644
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -107,7 +107,7 @@ class Tensor:
     return self
 
   def detach(self): return Tensor(self.lazydata, device=self.device, requires_grad=False)
-  def numpy(self) -> np.ndarray: return self.to('CPU').lazydata.toCPU()
+  def numpy(self) -> np.ndarray: return self.lazydata.toCPU()
 
   # TODO: if things are realized this won't work
   def to_(self, device:str):