diff --git a/README.md b/README.md index 73184d341a..c8e5ff4515 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ print(x.grad) # dz/dx print(y.grad) # dz/dy ``` -### Neural networks? +## Neural networks? It turns out, a decent autograd tensor library is 90% of what you need for neural networks. Add an optimizer (SGD, RMSprop, and Adam implemented) from tinygrad.optim, write some boilerplate minibatching code, and you have all you need. @@ -78,7 +78,7 @@ loss.backward() optim.step() ``` -### GPU Support?! +## GPU Support tinygrad supports GPUs through PyOpenCL. @@ -87,7 +87,7 @@ from tinygrad.tensor import Tensor (Tensor.ones(4,4).cuda() + Tensor.ones(4,4).cuda()).cpu() ``` -### ANE Support?!?! +### ANE Support?! If all you want to do is ReLU, you are in luck! You can do very fast ReLU (at least 30 MEGAReLUs/sec confirmed) @@ -103,7 +103,18 @@ print(b.cpu()) Warning: do not rely on the ANE port. It segfaults sometimes. So if you were doing something important with tinygrad and wanted to use the ANE, you might have a bad time. -### ImageNet inference +### Adding an accelerator + +You need to support 14 basic ops: + +``` +Add, Sub, Mul, Pow, Sum, Dot +Pad2D, Reshape +Relu, Sigmoid, LogSoftmax +Conv2D, MaxPool2D, AvgPool2D +``` + +## ImageNet inference Despite being tiny, tinygrad supports the full EfficientNet. Pass in a picture to discover what it is. @@ -129,7 +140,7 @@ See `examples/mnist_gan.py`

-### The promise of small +## The promise of small tinygrad will always be below 1000 lines. If it isn't, we will revert commits until tinygrad becomes smaller. @@ -142,9 +153,6 @@ python3 -m pytest ### TODO * Train an EfficientNet on ImageNet - * Make broadcasting work on the backward pass (simple please) - * EfficientNet backward pass - * Tensors on GPU (a few more backward) * Add a language model. BERT? * Add a detection model. EfficientDet? * Reduce code diff --git a/tinygrad/ops_cpu.py b/tinygrad/ops_cpu.py index de397ef29b..16e094c55e 100644 --- a/tinygrad/ops_cpu.py +++ b/tinygrad/ops_cpu.py @@ -86,8 +86,6 @@ class Dot(Function): grad_weight = input.T.dot(grad_output) return grad_input, grad_weight register('dot', Dot) -register('matmul', Dot) - # ************* simple ops ************* diff --git a/tinygrad/ops_gpu.py b/tinygrad/ops_gpu.py index 895fc75962..9c7399269d 100644 --- a/tinygrad/ops_gpu.py +++ b/tinygrad/ops_gpu.py @@ -290,7 +290,6 @@ class Dot(Function): return grad_input, grad_weight register('dot', Dot, device=Tensor.GPU) -register('matmul', Dot, device=Tensor.GPU) # ************* simple ops ************* diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 64d6a69d73..386f072a11 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -212,6 +212,9 @@ class Tensor: # ***** non first class ops ***** + def matmul(self, w): + return self.dot(w) + def mean(self, axis=None): out = self.sum(axis=axis) coeff = np.prod(out.shape)/np.prod(self.shape)