Files
tinygrad/examples/transformer.py
Jacky Lee f08187526f Fix examples (#540)
* Fix examples

* Remove training in parameters

* Simplify a bit

* Remove extra import

* Fix linter errors

* factor out Device

* NumPy-like semantics for Tensor.__getitem__ (#506)

* Rewrote Tensor.__getitem__ to fix negative indices and add support for np.newaxis/None

* Fixed pad2d

* mypy doesn't know about mlops methods

* normal python behavior for out-of-bounds slicing

* type: ignore

* inlined idxfix

* added comment for __getitem__

* Better comments, better tests, and fixed bug in np.newaxis

* update cpu and torch to hold buffers (#542)

* update cpu and torch to hold buffers

* save lines, and probably faster

* Mypy fun (#541)

* mypy fun

* things are just faster

* running fast

* mypy is fast

* compile.sh

* no gpu hack

* refactor ops_cpu and ops_torch to not subclass

* make weak buffer work

* tensor works

* fix test failing

* cpu/torch cleanups

* no or operator on dict in python 3.8

* that was junk

* fix warnings

* comment and touchup

* dyn add of math ops

* refactor ops_cpu and ops_torch to not share code

* nn/optim.py compiles now

* Reorder imports

* call mkdir only if directory doesn't exist

---------

Co-authored-by: George Hotz <geohot@gmail.com>
Co-authored-by: Mitchell Goff <mitchellgoffpc@gmail.com>
Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com>
2023-02-10 12:09:37 -06:00

43 lines
1.5 KiB
Python
Executable File

#!/usr/bin/env python3
import numpy as np
import random
from tinygrad.nn.optim import Adam
from extra.utils import get_parameters
from extra.training import train, evaluate
from models.transformer import Transformer
# dataset idea from https://github.com/karpathy/minGPT/blob/master/play_math.ipynb
def make_dataset():
ds = []
for i in range(100):
for j in range(100):
s = i+j
ds.append([i//10, i%10, j//10, j%10, s//100, (s//10)%10, s%10])
random.shuffle(ds)
ds = np.array(ds)
ds_X = ds[:, 0:6]
ds_Y = np.copy(ds[:, 1:])
ds_X_train, ds_X_test = ds_X[0:8000], ds_X[8000:]
ds_Y_train, ds_Y_test = ds_Y[0:8000], ds_Y[8000:]
return ds_X_train, ds_Y_train, ds_X_test, ds_Y_test
if __name__ == "__main__":
model = Transformer(10, 6, 2, 128, 4, 32)
X_train, Y_train, X_test, Y_test = make_dataset()
lr = 0.003
for i in range(10):
optim = Adam(get_parameters(model), lr=lr)
train(model, X_train, Y_train, optim, 50, BS=64)
acc, Y_test_preds = evaluate(model, X_test, Y_test, num_classes=10, return_predict=True)
lr /= 1.2
print(f'reducing lr to {lr:.4f}')
if acc > 0.998:
wrong=0
for k in range(len(Y_test_preds)):
if (Y_test_preds[k] != Y_test[k]).any():
wrong+=1
a,b,c,x = X_test[k,:2], X_test[k,2:4], Y_test[k,-3:], Y_test_preds[k,-3:]
print(f'{a[0]}{a[1]} + {b[0]}{b[1]} = {x[0]}{x[1]}{x[2]} (correct: {c[0]}{c[1]}{c[2]})')
print(f'Wrong predictions: {wrong}, acc = {acc:.4f}')