diff --git a/README.md b/README.md index 1e1a65761f..64e8b8f762 100644 --- a/README.md +++ b/README.md @@ -39,13 +39,13 @@ print(y.grad) # dz/dy ### Neural networks? -It turns out, a decent autograd tensor library is 90% of what you need for neural networks. Add an optimizer (SGD and Adam implemented) from tinygrad.optim, write some boilerplate minibatching code, and you have all you need. +It turns out, a decent autograd tensor library is 90% of what you need for neural networks. Add an optimizer (SGD, RMSprop and Adam implemented) from tinygrad.optim, write some boilerplate minibatching code, and you have all you need. ### Neural network example (from test/test_mnist.py) ```python from tinygrad.tensor import Tensor -import tinygrad.optim as tinygrad_optim +import tinygrad.optim as optim from tinygrad.utils import layer_init_uniform class TinyBobNet: @@ -57,8 +57,7 @@ class TinyBobNet: return x.dot(self.l1).relu().dot(self.l2).logsoftmax() model = TinyBobNet() -optim = tinygrad_optim.SGD([model.l1, model.l2], lr=0.001) # or -optim = tinygrad_optim.RMSprop([model.l1, model.l2], lr=0.001) +optim = optim.SGD([model.l1, model.l2], lr=0.001) # ... and complete like pytorch, with (x,y) data diff --git a/test/test_mnist.py b/test/test_mnist.py index 1a01132745..1f49a467cf 100644 --- a/test/test_mnist.py +++ b/test/test_mnist.py @@ -4,7 +4,7 @@ import unittest import numpy as np from tinygrad.tensor import Tensor from tinygrad.utils import layer_init_uniform, fetch_mnist -import tinygrad.optim as tinygrad_optim +import tinygrad.optim as optim from tqdm import trange np.random.seed(1337) @@ -78,20 +78,20 @@ class TestMNIST(unittest.TestCase): # models model = TinyConvNet() - optim = tinygrad_optim.Adam([model.c1, model.l1, model.l2], lr=0.001) + optimizer = optim.Adam([model.c1, model.l1, model.l2], lr=0.001) steps = 400 - train(model, optim, steps) + train(model, optimizer, steps) evaluate(model) model = TinyBobNet() - optim = tinygrad_optim.SGD([model.l1, model.l2], lr=0.001) steps = 1000 - train(model, optim, steps) + optimizer = optim.SGD([model.l1, model.l2], lr=0.001) + train(model, optimizer, steps) evaluate(model) - # RMSprop - optim = tinygrad_optim.RMSprop([model.l1, model.l2], lr=0.001) - train(model, optim, steps) + model = TinyBobNet() + optimizer = optim.RMSprop([model.l1, model.l2], lr=0.001) + train(model, optimizer, steps) evaluate(model) if __name__ == '__main__': diff --git a/tinygrad/optim.py b/tinygrad/optim.py index 6e22190856..d2465c4015 100644 --- a/tinygrad/optim.py +++ b/tinygrad/optim.py @@ -37,7 +37,7 @@ class Adam(Optimizer): # fill the 20% uncertainty of the above optim class RMSprop(Optimizer): - def __init__(self, params, lr=0.001, decay=0.9, eps=1e-6): + def __init__(self, params, lr=0.001, decay=0.9, eps=1e-8): super(RMSprop, self).__init__(params) self.lr = lr self.decay = decay