From 78210b5e40bcf0f238eb5ef649f87a9eb09bebcc Mon Sep 17 00:00:00 2001
From: James Roberts <37839840+jamesroberts@users.noreply.github.com>
Date: Mon, 14 Dec 2020 23:53:00 +0200
Subject: [PATCH] less lines (#197)

---
 tinygrad/ops_cpu.py | 28 +++++++++-------------------
 tinygrad/ops_gpu.py | 10 +++++-----
 tinygrad/tensor.py  | 23 +++++++++--------------
 3 files changed, 23 insertions(+), 38 deletions(-)

diff --git a/tinygrad/ops_cpu.py b/tinygrad/ops_cpu.py
index 16e094c55e..99c74175b9 100644
--- a/tinygrad/ops_cpu.py
+++ b/tinygrad/ops_cpu.py
@@ -93,9 +93,7 @@ class Pad2D(Function):
   @staticmethod
   def forward(ctx, x, padding=None):
     ctx.save_for_backward(padding)
-    return np.pad(x,
-      ((0,0), (0,0),
-       (padding[2], padding[3]), (padding[0], padding[1])))
+    return np.pad(x, ((0,0), (0,0), tuple(padding[2:4]), tuple(padding[0:2])))
 
   @staticmethod
   def backward(ctx, grad_output):
@@ -127,8 +125,7 @@ class ReLU(Function):
   @staticmethod
   def backward(ctx, grad_output):
     input, = ctx.saved_tensors
-    grad_input = grad_output * (input >= 0)
-    return grad_input
+    return grad_output * (input >= 0)
 register('relu', ReLU)
 
 class Sigmoid(Function):
@@ -146,8 +143,7 @@ class Sigmoid(Function):
   @staticmethod
   def backward(ctx, grad_output):
     ret, = ctx.saved_tensors
-    grad_input = grad_output * (ret * (1 - ret))
-    return grad_input
+    return grad_output * (ret * (1 - ret))
 register('sigmoid', Sigmoid)
 
 class LogSoftmax(Function):
@@ -185,12 +181,10 @@ class Conv2D(Function):
 
     gx = x.reshape(bs,ctx.groups,cin,x.shape[2],x.shape[3])
     tx = np.lib.stride_tricks.as_strided(gx,
-           shape=(bs, ctx.groups, cin, oy, ox, H, W),
-           strides=(gx.strides[0], gx.strides[1], gx.strides[2],
-                    gx.strides[3]*ys, gx.strides[4]*xs,
-                    gx.strides[3], gx.strides[4]),
-           writeable=False,
-         )
+      shape=(bs, ctx.groups, cin, oy, ox, H, W),
+      strides=(*gx.strides[0:3], gx.strides[3]*ys, gx.strides[4]*xs, *gx.strides[3:5]),
+      writeable=False,
+    )
     tw = w.reshape(ctx.groups, rcout, cin, H, W)
     ctx.save_for_backward(tx, tw, x.shape)
 
@@ -258,9 +252,7 @@ class MaxPool2D(Function):
   @staticmethod
   def backward(ctx, grad_output):
     idxs,s = ctx.saved_tensors
-    return unstack_for_pool(
-      lambda idx: grad_output * (idxs == idx),
-      s, *ctx.kernel_size)
+    return unstack_for_pool(lambda idx: grad_output * (idxs == idx), s, *ctx.kernel_size)
 register('max_pool2d', MaxPool2D)
 
 class AvgPool2D(Function):
@@ -274,8 +266,6 @@ class AvgPool2D(Function):
   def backward(ctx, grad_output):
     s, = ctx.saved_tensors
     py, px = ctx.kernel_size
-    return unstack_for_pool(
-      lambda idx: grad_output/py/px,
-      s, py, px)
+    return unstack_for_pool(lambda idx: grad_output/py/px, s, py, px)
 register('avg_pool2d', AvgPool2D)
 
diff --git a/tinygrad/ops_gpu.py b/tinygrad/ops_gpu.py
index 9c7399269d..32c46c6fb3 100644
--- a/tinygrad/ops_gpu.py
+++ b/tinygrad/ops_gpu.py
@@ -346,8 +346,7 @@ class Reshape(Function):
   @staticmethod
   def backward(ctx, grad_output):
     in_shape, = ctx.saved_tensors
-    grad_output = GPUBuffer(in_shape, hostbuf=grad_output)
-    return grad_output
+    return GPUBuffer(in_shape, hostbuf=grad_output)
 register('reshape', Reshape, device=Tensor.GPU)
 
 # ************* activation ops *************
@@ -449,6 +448,10 @@ class Conv2D(Function):
     # output buffer
     ret = buffer_new(ctx, (bs, cout, oy, ox))
 
+    # input  = (bs, groups, cin, iy, ix)
+    # weight = (groups, rcout, cin, H, W)
+    # output = (bs, groups, rcout, oy, ox)
+
     conv = clbuild(ctx.cl_ctx, "conv", """
     __kernel void conv(__global const float *input, __global const float *weight, __global float *output,
       int H, int W, int groups, int rcout, int cin, int oy, int ox, int iy, int ix, int ys, int xs) {
@@ -462,9 +465,6 @@ class Conv2D(Function):
       int IY = Y*ys;
       int IX = X*xs;
 
-      // input  = (bs, groups, cin, iy, ix)
-      // weight = (groups, rcout, cin, H, W)
-      // output = (bs, groups, rcout, oy, ox)
       float acc = 0.0;
       for (int ci = 0; ci < cin; ci++) {
         for (int y = IY; y < IY+H; y++) {
diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py
index 386f072a11..730f0b13dd 100644
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@@ -8,9 +8,8 @@ from collections import defaultdict
 
 DEBUG = os.getenv("DEBUG", None) is not None
 if DEBUG:
-  import collections, atexit, time
-  debug_counts = collections.defaultdict(int)
-  debug_times = collections.defaultdict(float)
+  import atexit, time
+  debug_counts, debug_times = defaultdict(int), defaultdict(float)
   def print_debug_exit():
     for name, _ in sorted(debug_times.items(), key=lambda x: -x[1]):
       print(f"{name:>20} : {debug_counts[name]:>6} {debug_times[name]:>10.2f} ms")
@@ -88,9 +87,7 @@ class Tensor:
         Tensor.did_float_warning = True
       self.device = Tensor.CPU
 
-    self.data = data
-    self.grad = None
-    self.requires_grad = requires_grad
+    self.data, self.grad, self.requires_grad = data, None, requires_grad
 
     if gpu:
       self.cuda_()
@@ -157,12 +154,11 @@ class Tensor:
       if len(t0._ctx.parents) == 1:
         grads = [grads]
       for t,g in zip(t0._ctx.parents, grads):
-        if g is None:
-          continue
-        assert g.shape == t.shape, \
-          f"grad shape must match tensor shape in {self._ctx!r}, {g.shape!r} != {t.shape!r}"
-        gt = Tensor(g, requires_grad=False)
-        t.grad = gt if t.grad is None else (t.grad + gt)
+        if g is not None:
+          assert g.shape == t.shape, \
+            f"grad shape must match tensor shape in {self._ctx!r}, {g.shape!r} != {t.shape!r}"
+          gt = Tensor(g, requires_grad=False)
+          t.grad = gt if t.grad is None else (t.grad + gt)
 
   # ***** tinygrad supports CPU and GPU *****
 
@@ -197,8 +193,7 @@ class Tensor:
         if self.grad:
           ret.grad = self.grad.cuda()
         return ret
-    else:
-      return self
+    return self
 
   def ane(self):
     assert(not self.gpu)