Big MNIST model with PIL augmentation and load/save (#160)

* 2serious

* load/save

* fixing GPU

* added DEBUG

* needs BatchNorm or doesn't learn anything

* old file not needed

* added conv biases

* added extra/training.py and checkpoint

* assert in test only

* save

* padding

* num_classes

* checkpoint

* checkpoints for padding

* training was broken

* merge

* rotation augmentation

* more aug

* needs testing

* streamline augment, augment is fast thus bicubic

* tidying up
This commit is contained in:
Marcel Bischoff
2020-12-13 23:45:55 -05:00
committed by GitHub
parent f50dcc12ac
commit da72a0eed4
4 changed files with 216 additions and 84 deletions

View File

@@ -1,55 +1,138 @@
#!/usr/bin/env python
# see https://github.com/Matuzas77/MNIST-0.17/blob/master/MNIST_final_solution.ipynb
#inspired by https://github.com/Matuzas77/MNIST-0.17/blob/master/MNIST_final_solution.ipynb
import os
import sys
sys.path.append(os.getcwd())
sys.path.append(os.path.join(os.getcwd(), 'test'))
from tinygrad.tensor import Tensor
import numpy as np
from tinygrad.tensor import Tensor, GPU
from tinygrad.nn import BatchNorm2D
import tinygrad.optim as optim
from extra.utils import get_parameters
from test_mnist import fetch_mnist
from extra.training import train, evaluate
import tinygrad.optim as optim
from extra.augment import augment_img
GPU = os.getenv("GPU", None) is not None
QUICK = os.getenv("QUICK", None) is not None
DEBUG = os.getenv("DEBUG", None) is not None
# TODO: abstract this generic trainer out of the test
from test_mnist import train as train_on_mnist
class SqueezeExciteBlock2D:
def __init__(self, filters):
self.filters = filters
self.weight1 = Tensor.uniform(self.filters, self.filters//32)
self.bias1 = Tensor.uniform(1,self.filters//32)
self.weight2 = Tensor.uniform(self.filters//32, self.filters)
self.bias2 = Tensor.uniform(1, self.filters)
GPU = os.getenv("GPU") is not None
def __call__(self, input):
se = input.avg_pool2d(kernel_size=(input.shape[2], input.shape[3])) #GlobalAveragePool2D
se = se.reshape(shape=(-1, self.filters))
se = se.dot(self.weight1) + self.bias1
se = se.relu()
se = se.dot(self.weight2) + self.bias2
se = se.sigmoid().reshape(shape=(-1,self.filters,1,1)) #for broadcasting
se = input.mul(se)
return se
class SeriousModel:
class ConvBlock:
def __init__(self, h, w, inp, filters=128, conv=3):
self.h, self.w = h, w
self.inp = inp
#init weights
self.cweights = [Tensor.uniform(filters, inp if i==0 else filters, conv, conv) for i in range(3)]
self.cbiases = [Tensor.uniform(1, filters, 1, 1) for i in range(3)]
#init layers
self._bn = BatchNorm2D(128, training=True)
self._seb = SqueezeExciteBlock2D(filters)
def __call__(self, input):
x = input.reshape(shape=(-1, self.inp, self.w, self.h))
for cweight, cbias in zip(self.cweights, self.cbiases):
x = x.pad2d(padding=[1,1,1,1]).conv2d(cweight).add(cbias).relu()
x = self._bn(x)
x = self._seb(x)
return x
class BigConvNet:
def __init__(self):
self.blocks = 3
self.block_convs = 3
self.conv = [ConvBlock(28,28,1), ConvBlock(28,28,128), ConvBlock(14,14,128)]
self.weight1 = Tensor.uniform(128,10)
self.weight2 = Tensor.uniform(128,10)
# TODO: raise back to 128 when it's fast
self.chans = 32
def parameters(self):
if DEBUG: #keeping this for a moment
pars = [par for par in get_parameters(self) if par.requires_grad]
no_pars = 0
for par in pars:
print(par.shape)
no_pars += np.prod(par.shape)
print('no of parameters', no_pars)
return pars
else:
return get_parameters(self)
self.convs = [Tensor.uniform(self.chans, self.chans if i > 0 else 1, 3, 3) for i in range(self.blocks * self.block_convs)]
self.cbias = [Tensor.uniform(1, self.chans, 1, 1) for i in range(self.blocks * self.block_convs)]
self.bn = [BatchNorm2D(self.chans, training=True) for i in range(3)]
self.fc1 = Tensor.uniform(self.chans, 10)
self.fc2 = Tensor.uniform(self.chans, 10)
def save(self, filename):
with open(filename+'.npy', 'wb') as f:
for par in get_parameters(self):
#if par.requires_grad:
np.save(f, par.cpu().data)
def load(self, filename):
with open(filename+'.npy', 'rb') as f:
for par in get_parameters(self):
#if par.requires_grad:
try:
par.cpu().data[:] = np.load(f)
if GPU:
par.cuda()
except:
print('Could not load parameter')
def forward(self, x):
x = x.reshape(shape=(-1, 1, 28, 28)) # hacks
for i in range(self.blocks):
for j in range(self.block_convs):
#print(i, j, x.shape, x.sum().cpu())
# TODO: should padding be used?
x = x.conv2d(self.convs[i*3+j]).add(self.cbias[i*3+j]).relu()
x = self.bn[i](x)
if i > 0:
x = x.avg_pool2d(kernel_size=(2,2))
# TODO: Add concat support to concat with max_pool2d
x1 = x.avg_pool2d(kernel_size=x.shape[2:4]).reshape(shape=(-1, x.shape[1]))
x2 = x.max_pool2d(kernel_size=x.shape[2:4]).reshape(shape=(-1, x.shape[1]))
x = x1.dot(self.fc1) + x2.dot(self.fc2)
return x.logsoftmax()
x = self.conv[0](x)
x = self.conv[1](x)
x = x.avg_pool2d(kernel_size=(2,2))
x = self.conv[2](x)
x1 = x.avg_pool2d(kernel_size=(14,14)).reshape(shape=(-1,128)) #global
x2 = x.max_pool2d(kernel_size=(14,14)).reshape(shape=(-1,128)) #global
xo = x1.dot(self.weight1) + x2.dot(self.weight2)
return xo.logsoftmax()
if __name__ == "__main__":
model = SeriousModel()
params = get_parameters(model)
if GPU:
[x.cuda_() for x in params]
optimizer = optim.Adam(params, lr=0.001)
train_on_mnist(model, optimizer, steps=1875, BS=32, gpu=GPU)
lrs = [1e-4, 1e-5] if QUICK else [1e-3, 1e-4, 1e-5, 1e-5]
epochss = [2, 1] if QUICK else [13, 3, 3, 1]
BS = 32
lmbd = 0.00025
lossfn = lambda out,y: out.mul(y).mean() + lmbd*(model.weight1.abs() + model.weight2.abs()).sum()
X_train, Y_train, X_test, Y_test = fetch_mnist()
steps = len(X_train)//BS
np.random.seed(1337)
if QUICK:
steps = 1
X_test, Y_test = X_test[:BS], Y_test[:BS]
model = BigConvNet()
if len(sys.argv) > 1:
try:
model.load(sys.argv[1])
print('Loaded weights "'+sys.argv[1]+'", evaluating...')
evaluate(model, X_test, Y_test, BS=BS)
except:
print('could not load weights "'+sys.argv[1]+'".')
if GPU:
params = get_parameters(model)
[x.cuda_() for x in params]
for lr, epochs in zip(lrs, epochss):
optimizer = optim.Adam(model.parameters(), lr=lr)
for epoch in range(1,epochs+1):
#first epoch without augmentation
X_aug = X_train if epoch == 1 else augment_img(X_train)
train(model, X_aug, Y_train, optimizer, steps=steps, lossfn=lossfn, gpu=GPU, BS=BS)
accuracy = evaluate(model, X_test, Y_test, BS=BS)
model.save('examples/checkpoint'+str("%.0f" % (accuracy*1.0e6)))