From a68ead09c00119f2af6541ae3b67ef027ac0863c Mon Sep 17 00:00:00 2001 From: George Hotz Date: Wed, 21 Oct 2020 09:34:16 -0700 Subject: [PATCH 1/2] use numba to double conv speed --- test/mnist.py | 2 +- tinygrad/tensor.py | 55 +++++++++++++++++++++++++++------------------- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/test/mnist.py b/test/mnist.py index ec60d2bdef..67f35a2462 100644 --- a/test/mnist.py +++ b/test/mnist.py @@ -73,7 +73,7 @@ for i in (t := trange(steps)): # evaluate def numpy_eval(): - Y_test_preds_out = model.forward(Tensor(X_test.reshape((-1, 28*28)))) + Y_test_preds_out = model.forward(Tensor(X_test.reshape((-1, 28*28)).astype(np.float32))) Y_test_preds = np.argmax(Y_test_preds_out.data, axis=1) return (Y_test == Y_test_preds).mean() diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index eccede4bc1..c68ef70936 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -1,6 +1,7 @@ # inspired by https://github.com/karpathy/micrograd/blob/master/micrograd/engine.py from functools import partialmethod import numpy as np +from numba import jit, float32 # **** start with two base classes **** @@ -170,35 +171,43 @@ class LogSoftmax(Function): return grad_output - np.exp(output)*grad_output.sum(axis=1).reshape((-1, 1)) register('logsoftmax', LogSoftmax) + +@jit(nopython=True) +def conv2d_inner_forward(x, w): + cout,cin,H,W = w.shape + ret = np.zeros((x.shape[0], cout, x.shape[2]-(H-1), x.shape[3]-(W-1)), dtype=w.dtype) + for j in range(H): + for i in range(W): + tw = w[:, :, j, i] + for Y in range(ret.shape[2]): + for X in range(ret.shape[3]): + ret[:, :, Y, X] += x[:, :, Y+j, X+i].dot(tw.T) + return ret + +@jit(nopython=True) +def conv2d_inner_backward(grad_output, x, w): + dx = np.zeros_like(x) + dw = np.zeros_like(w) + cout,cin,H,W = w.shape + for j in range(H): + for i in range(W): + tw = w[:, :, j, i] + for Y in range(grad_output.shape[2]): + for X in range(grad_output.shape[3]): + gg = grad_output[:, :, Y, X] + tx = x[:, :, Y+j, X+i] + dx[:, :, Y+j, X+i] += gg.dot(tw) + dw[:, :, j, i] += gg.T.dot(tx) + return dx, dw + class Conv2D(Function): @staticmethod def forward(ctx, x, w): ctx.save_for_backward(x, w) - cout,cin,H,W = w.shape - ret = np.zeros((x.shape[0], cout, x.shape[2]-(H-1), x.shape[3]-(W-1)), dtype=w.dtype) - for j in range(H): - for i in range(W): - tw = w[:, :, j, i] - for Y in range(ret.shape[2]): - for X in range(ret.shape[3]): - ret[:, :, Y, X] += x[:, :, Y+j, X+i].dot(tw.T) - return ret + return conv2d_inner_forward(x, w) @staticmethod def backward(ctx, grad_output): - x, w = ctx.saved_tensors - dx = np.zeros_like(x) - dw = np.zeros_like(w) - cout,cin,H,W = w.shape - for j in range(H): - for i in range(W): - tw = w[:, :, j, i] - for Y in range(grad_output.shape[2]): - for X in range(grad_output.shape[3]): - gg = grad_output[:, :, Y, X] - tx = x[:, :, Y+j, X+i] - dx[:, :, Y+j, X+i] += gg.dot(tw) - dw[:, :, j, i] += gg.T.dot(tx) - return dx, dw + return conv2d_inner_backward(grad_output, *ctx.saved_tensors) register('conv2d', Conv2D) From 685ce5ba7e4c0b029de973bdd6846c1b775b9754 Mon Sep 17 00:00:00 2001 From: George Hotz Date: Wed, 21 Oct 2020 09:44:28 -0700 Subject: [PATCH 2/2] scope better and don't break tests --- tinygrad/tensor.py | 69 +++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index c68ef70936..059e73d4ee 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -1,7 +1,12 @@ # inspired by https://github.com/karpathy/micrograd/blob/master/micrograd/engine.py from functools import partialmethod import numpy as np -from numba import jit, float32 + +# optional jit +try: + from numba import jit +except ImportError: + jit = lambda x: x # **** start with two base classes **** @@ -172,42 +177,44 @@ class LogSoftmax(Function): register('logsoftmax', LogSoftmax) -@jit(nopython=True) -def conv2d_inner_forward(x, w): - cout,cin,H,W = w.shape - ret = np.zeros((x.shape[0], cout, x.shape[2]-(H-1), x.shape[3]-(W-1)), dtype=w.dtype) - for j in range(H): - for i in range(W): - tw = w[:, :, j, i] - for Y in range(ret.shape[2]): - for X in range(ret.shape[3]): - ret[:, :, Y, X] += x[:, :, Y+j, X+i].dot(tw.T) - return ret - -@jit(nopython=True) -def conv2d_inner_backward(grad_output, x, w): - dx = np.zeros_like(x) - dw = np.zeros_like(w) - cout,cin,H,W = w.shape - for j in range(H): - for i in range(W): - tw = w[:, :, j, i] - for Y in range(grad_output.shape[2]): - for X in range(grad_output.shape[3]): - gg = grad_output[:, :, Y, X] - tx = x[:, :, Y+j, X+i] - dx[:, :, Y+j, X+i] += gg.dot(tw) - dw[:, :, j, i] += gg.T.dot(tx) - return dx, dw - class Conv2D(Function): + @staticmethod + @jit + def inner_forward(x, w): + cout,cin,H,W = w.shape + ret = np.zeros((x.shape[0], cout, x.shape[2]-(H-1), x.shape[3]-(W-1)), dtype=w.dtype) + for j in range(H): + for i in range(W): + tw = w[:, :, j, i] + for Y in range(ret.shape[2]): + for X in range(ret.shape[3]): + ret[:, :, Y, X] += x[:, :, Y+j, X+i].dot(tw.T) + return ret + + @staticmethod + @jit + def inner_backward(grad_output, x, w): + dx = np.zeros_like(x) + dw = np.zeros_like(w) + cout,cin,H,W = w.shape + for j in range(H): + for i in range(W): + tw = w[:, :, j, i] + for Y in range(grad_output.shape[2]): + for X in range(grad_output.shape[3]): + gg = grad_output[:, :, Y, X] + tx = x[:, :, Y+j, X+i] + dx[:, :, Y+j, X+i] += gg.dot(tw) + dw[:, :, j, i] += gg.T.dot(tx) + return dx, dw + @staticmethod def forward(ctx, x, w): ctx.save_for_backward(x, w) - return conv2d_inner_forward(x, w) + return Conv2D.inner_forward(x, w) @staticmethod def backward(ctx, grad_output): - return conv2d_inner_backward(grad_output, *ctx.saved_tensors) + return Conv2D.inner_backward(grad_output, *ctx.saved_tensors) register('conv2d', Conv2D)