diff --git a/README.md b/README.md index 6fd542c0ed..1e1a65761f 100644 --- a/README.md +++ b/README.md @@ -41,11 +41,11 @@ print(y.grad) # dz/dy It turns out, a decent autograd tensor library is 90% of what you need for neural networks. Add an optimizer (SGD and Adam implemented) from tinygrad.optim, write some boilerplate minibatching code, and you have all you need. -### Neural network example (from test/mnist.py) +### Neural network example (from test/test_mnist.py) ```python from tinygrad.tensor import Tensor -import tinygrad.optim as optim +import tinygrad.optim as tinygrad_optim from tinygrad.utils import layer_init_uniform class TinyBobNet: @@ -57,7 +57,8 @@ class TinyBobNet: return x.dot(self.l1).relu().dot(self.l2).logsoftmax() model = TinyBobNet() -optim = optim.SGD([model.l1, model.l2], lr=0.001) +optim = tinygrad_optim.SGD([model.l1, model.l2], lr=0.001) # or +optim = tinygrad_optim.RMSprop([model.l1, model.l2], lr=0.001) # ... and complete like pytorch, with (x,y) data diff --git a/test/test_mnist.py b/test/test_mnist.py index 59b5065991..1e60a4c058 100644 --- a/test/test_mnist.py +++ b/test/test_mnist.py @@ -36,57 +36,64 @@ class TinyConvNet: class TestMNIST(unittest.TestCase): - def test_mnist(self): + def train(model, optim, steps, BS=128): + losses, accuracies = [], [] + for i in (t := trange(steps)): + samp = np.random.randint(0, X_train.shape[0], size=(BS)) + + x = Tensor(X_train[samp].reshape((-1, 28*28)).astype(np.float32)) + Y = Y_train[samp] + y = np.zeros((len(samp),10), np.float32) + # correct loss for NLL, torch NLL loss returns one per row + y[range(y.shape[0]),Y] = -10.0 + y = Tensor(y) + + # network + out = model.forward(x) + + # NLL loss function + loss = out.mul(y).mean() + loss.backward() + optim.step() + + cat = np.argmax(out.data, axis=1) + accuracy = (cat == Y).mean() + + # printing + loss = loss.data + losses.append(loss) + accuracies.append(accuracy) + t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy)) + + def evaluate(model): + def numpy_eval(): + Y_test_preds_out = model.forward(Tensor(X_test.reshape((-1, 28*28)).astype(np.float32))) + Y_test_preds = np.argmax(Y_test_preds_out.data, axis=1) + return (Y_test == Y_test_preds).mean() + + accuracy = numpy_eval() + print("test set accuracy is %f" % accuracy) + assert accuracy > 0.95 + + # models if os.getenv("CONV") == "1": model = TinyConvNet() optim = tinygrad_optim.Adam([model.c1, model.l1, model.l2], lr=0.001) steps = 400 + train(model, optim, steps) + evaluate(model) else: model = TinyBobNet() - # optim = tinygrad_optim.SGD([model.l1, model.l2], lr=0.001) - optim = tinygrad_optim.RMSprop([model.l1, model.l2], lr=0.001) + optim = tinygrad_optim.SGD([model.l1, model.l2], lr=0.001) steps = 1000 - - BS = 128 - losses, accuracies = [], [] - for i in (t := trange(steps)): - samp = np.random.randint(0, X_train.shape[0], size=(BS)) + train(model, optim, steps) + evaluate(model) - x = Tensor(X_train[samp].reshape((-1, 28*28)).astype(np.float32)) - Y = Y_train[samp] - y = np.zeros((len(samp),10), np.float32) - # correct loss for NLL, torch NLL loss returns one per row - y[range(y.shape[0]),Y] = -10.0 - y = Tensor(y) - - # network - out = model.forward(x) - - # NLL loss function - loss = out.mul(y).mean() - loss.backward() - optim.step() - - cat = np.argmax(out.data, axis=1) - accuracy = (cat == Y).mean() - - # printing - loss = loss.data - losses.append(loss) - accuracies.append(accuracy) - t.set_description("loss %.2f accuracy %.2f" % (loss, accuracy)) - - # evaluate - def numpy_eval(): - Y_test_preds_out = model.forward(Tensor(X_test.reshape((-1, 28*28)).astype(np.float32))) - Y_test_preds = np.argmax(Y_test_preds_out.data, axis=1) - return (Y_test == Y_test_preds).mean() - - accuracy = numpy_eval() - print("test set accuracy is %f" % accuracy) - assert accuracy > 0.95 - + # RMSprop + optim = tinygrad_optim.RMSprop([model.l1, model.l2], lr=0.001) + train(model, optim, steps) + evaluate(model) if __name__ == '__main__': unittest.main()