batchnorm work

This commit is contained in:
George Hotz
2020-12-06 14:40:07 -08:00
parent da514c2918
commit 00312b8ad1
5 changed files with 32 additions and 19 deletions

View File

@@ -10,7 +10,7 @@ import time
import numpy as np
np.set_printoptions(suppress=True)
from tinygrad.tensor import Tensor
from tinygrad.utils import fetch
from tinygrad.utils import fetch, get_parameters
from extra.efficientnet import EfficientNet
def infer(model, img):
@@ -53,7 +53,9 @@ def infer(model, img):
if __name__ == "__main__":
# instantiate my net
model = EfficientNet(int(os.getenv("NUM", "0")))
model.load_weights_from_torch(GPU)
model.load_weights_from_torch()
if GPU:
[x.cuda_() for x in get_parameters(model)]
# category labels
import ast

View File

@@ -41,6 +41,8 @@ if __name__ == "__main__":
model = TinyConvNet(classes)
else:
model = EfficientNet(int(os.getenv("NUM", "0")), classes, has_se=False)
#model = EfficientNet(int(os.getenv("NUM", "0")), classes, has_se=True)
#model.load_weights_from_torch()
parameters = get_parameters(model)
print("parameters", len(parameters))
@@ -74,13 +76,14 @@ if __name__ == "__main__":
optimizer.step()
opt_time = (time.time()-st)*1000.0
#print(out.cpu().data)
st = time.time()
loss = loss.cpu().data
cat = np.argmax(out.cpu().data, axis=1)
accuracy = (cat == Y).mean()
finish_time = (time.time()-st)*1000.0
# printing
t.set_description("loss %.2f accuracy %.2f -- %.2f + %.2f + %.2f + %.2f = %.2f -- %d" %
(loss, accuracy,

View File

@@ -180,8 +180,8 @@ class EfficientNet:
def forward(self, x):
x = x.pad2d(padding=(0,1,0,1))
x = self._bn0(x.conv2d(self._conv_stem, stride=2)).swish()
#print(x.shape, x.data[:, 0, 0, 0])
for block in self._blocks:
#print(x.shape)
x = block(x)
x = self._bn1(x.conv2d(self._conv_head)).swish()
x = x.avg_pool2d(kernel_size=x.shape[2:4])
@@ -189,7 +189,7 @@ class EfficientNet:
#x = x.dropout(0.2)
return x.dot(self._fc).add(self._fc_bias.reshape(shape=[1,-1]))
def load_weights_from_torch(self, gpu):
def load_weights_from_torch(self):
# load b0
# https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/utils.py#L551
if self.number == 0:
@@ -223,7 +223,10 @@ class EfficientNet:
except AttributeError:
mv = eval(mk.replace(".bias", "_bias"))
vnp = v.numpy().astype(np.float32) if USE_TORCH else v
mv.data[:] = vnp if k != '_fc.weight' else vnp.T
if gpu:
mv.cuda_()
vnp = vnp if k != '_fc.weight' else vnp.T
if mv.shape == vnp.shape or vnp.shape == ():
mv.data[:] = vnp
else:
print("MISMATCH SHAPE IN %s, %r %r" % (k, mv.shape, vnp.shape))

View File

@@ -2,20 +2,25 @@ from tinygrad.tensor import Tensor
class BatchNorm2D:
def __init__(self, sz, eps=0.001):
self.eps = eps
self.eps = Tensor([eps], requires_grad=False)
self.two = Tensor([2], requires_grad=False)
self.weight = Tensor.ones(sz)
self.bias = Tensor.zeros(sz)
# TODO: need running_mean and running_var
self.running_mean = Tensor.zeros(sz)
self.running_var = Tensor.ones(sz)
self.running_mean = Tensor.zeros(sz, requires_grad=False)
self.running_var = Tensor.ones(sz, requires_grad=False)
self.num_batches_tracked = Tensor.zeros(1, requires_grad=False)
def __call__(self, x):
# TODO: use tinyops for this
# mean op needs to support the axis argument before we can do this
#self.running_mean.data = x.data.mean(axis=(0,2,3))
#self.running_var.data = ((x - self.running_mean.reshape(shape=[1, -1, 1, 1]))**self.two).data.mean(axis=(0,2,3))
# this work at inference?
x = x.sub(self.running_mean.reshape(shape=[1, -1, 1, 1]))
x = x.mul(self.weight.reshape(shape=[1, -1, 1, 1]))
x = x.div(self.running_var.add(Tensor([self.eps], gpu=x.gpu)).reshape(shape=[1, -1, 1, 1]).sqrt())
x = x.div(self.running_var.add(self.eps).reshape(shape=[1, -1, 1, 1]).sqrt())
x = x.add(self.bias.reshape(shape=[1, -1, 1, 1]))
return x

View File

@@ -195,23 +195,23 @@ class Tensor:
# ***** non first class ops *****
def mean(self):
div = Tensor(np.array([1/np.prod(self.shape)], dtype=self.dtype), gpu=self.gpu)
div = Tensor(np.array([1/np.prod(self.shape)], dtype=self.dtype), gpu=self.gpu, requires_grad=False)
return self.sum().mul(div)
def sqrt(self):
root = Tensor(np.zeros(self.shape, dtype=self.dtype)+0.5, gpu=self.gpu)
root = Tensor(np.zeros(self.shape, dtype=self.dtype)+0.5, gpu=self.gpu, requires_grad=False)
return self.pow(root)
def div(self, y):
root = Tensor(np.zeros(self.shape, dtype=self.dtype)-1, gpu=self.gpu)
root = Tensor(np.zeros(self.shape, dtype=self.dtype)-1, gpu=self.gpu, requires_grad=False)
return self.mul(y.pow(root))
def swish(self):
return self.mul(self.sigmoid())
def tanh(self):
t2 = Tensor(np.zeros(self.shape, dtype=self.dtype)+2, gpu=self.gpu)
t1 = Tensor(np.zeros(self.shape, dtype=self.dtype)+1, gpu=self.gpu)
t2 = Tensor(np.zeros(self.shape, dtype=self.dtype)+2, gpu=self.gpu, requires_grad=False)
t1 = Tensor(np.zeros(self.shape, dtype=self.dtype)+1, gpu=self.gpu, requires_grad=False)
return self.mul(t2).sigmoid().mul(t2) - t1 # 2*sigmoid(2*x)-1
# An instantiation of the Function is the Context
@@ -251,7 +251,7 @@ def register(name, fxn, gpu=False):
f.cl_ctx, f.cl_queue = cl_ctx, cl_queue
return f.apply(f, *x, **kwargs)
setattr(Tensor, name, dispatch)
if name in ['add', 'sub', 'mul', 'div']:
if name in ['add', 'sub', 'mul', 'div', 'pow']:
setattr(Tensor, "__%s__" % name, dispatch)
setattr(Tensor, "__i%s__" % name, lambda self,x: self.assign(dispatch(self,x)))