From 1b3b8de5e2575d3c51b80bba012cb36a77c8fe5c Mon Sep 17 00:00:00 2001
From: George Hotz <geohot@gmail.com>
Date: Thu, 23 Nov 2023 14:54:52 -0800
Subject: [PATCH] update readme examples

---
 README.md | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 482f17f122..332993bff7 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ tinygrad can run [LLaMA](/docs/showcase.md#llama) and [Stable Diffusion](/docs/s
 Try a matmul. See how, despite the style, it is fused into one kernel with the power of laziness.
 
 ```sh
-DEBUG=3 python3 -c "from tinygrad.tensor import Tensor;
+DEBUG=3 python3 -c "from tinygrad import Tensor;
 N = 1024; a, b = Tensor.rand(N, N), Tensor.rand(N, N);
 c = (a.reshape(N, 1, N) * b.permute(1,0).reshape(1, N, N)).sum(axis=2);
 print((c.numpy() - (a.numpy() @ b.numpy())).mean())"
@@ -48,30 +48,28 @@ And we can change `DEBUG` to `4` to see the generated code.
 As it turns out, 90% of what you need for neural networks are a decent autograd/tensor library.
 Throw in an optimizer, a data loader, and some compute, and you have all you need.
 
-#### Neural network example (from test/models/test_mnist.py)
+#### Neural network example (see examples/beautiful_mnist.py for the full thing)
 
 ```py
-from tinygrad.tensor import Tensor
-import tinygrad.nn.optim as optim
+from tinygrad import Tensor, nn
 
-class TinyBobNet:
+class LinearNet:
   def __init__(self):
-    self.l1 = Tensor.uniform(784, 128)
-    self.l2 = Tensor.uniform(128, 10)
+    self.l1 = Tensor.kaiming_uniform(784, 128)
+    self.l2 = Tensor.kaiming_uniform(128, 10)
+  def __call__(self, x:Tensor) -> Tensor:
+    return x.flatten(1).dot(self.l1).relu().dot(self.l2)
 
-  def forward(self, x):
-    return x.dot(self.l1).relu().dot(self.l2).log_softmax()
+model = LinearNet()
+optim = nn.optim.Adam([model.l1, model.l2], lr=0.001)
 
-model = TinyBobNet()
-optim = optim.SGD([model.l1, model.l2], lr=0.001)
+x, y = Tensor.rand(4, 1, 28, 28), Tensor([2,4,3,7])  # replace with real mnist dataloader
 
-# ... complete data loader here
-
-out = model.forward(x)
-loss = out.mul(y).mean()
-optim.zero_grad()
-loss.backward()
-optim.step()
+for i in range(10):
+  optim.zero_grad()
+  loss = model(x).sparse_categorical_crossentropy(y).backward()
+  optim.step()
+  print(i, loss.item())
 ```
 
 ## Accelerators
@@ -112,7 +110,7 @@ Documentation along with a quick start guide can be found in the [docs/](/docs)
 ### Quick example comparing to PyTorch
 
 ```py
-from tinygrad.tensor import Tensor
+from tinygrad import Tensor
 
 x = Tensor.eye(3, requires_grad=True)
 y = Tensor([[2.0,0,-2.0]], requires_grad=True)