mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-07 03:00:26 -04:00
batchnorm work
This commit is contained in:
@@ -10,7 +10,7 @@ import time
|
||||
import numpy as np
|
||||
np.set_printoptions(suppress=True)
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad.utils import fetch
|
||||
from tinygrad.utils import fetch, get_parameters
|
||||
from extra.efficientnet import EfficientNet
|
||||
|
||||
def infer(model, img):
|
||||
@@ -53,7 +53,9 @@ def infer(model, img):
|
||||
if __name__ == "__main__":
|
||||
# instantiate my net
|
||||
model = EfficientNet(int(os.getenv("NUM", "0")))
|
||||
model.load_weights_from_torch(GPU)
|
||||
model.load_weights_from_torch()
|
||||
if GPU:
|
||||
[x.cuda_() for x in get_parameters(model)]
|
||||
|
||||
# category labels
|
||||
import ast
|
||||
|
||||
@@ -41,6 +41,8 @@ if __name__ == "__main__":
|
||||
model = TinyConvNet(classes)
|
||||
else:
|
||||
model = EfficientNet(int(os.getenv("NUM", "0")), classes, has_se=False)
|
||||
#model = EfficientNet(int(os.getenv("NUM", "0")), classes, has_se=True)
|
||||
#model.load_weights_from_torch()
|
||||
|
||||
parameters = get_parameters(model)
|
||||
print("parameters", len(parameters))
|
||||
@@ -74,13 +76,14 @@ if __name__ == "__main__":
|
||||
optimizer.step()
|
||||
opt_time = (time.time()-st)*1000.0
|
||||
|
||||
#print(out.cpu().data)
|
||||
|
||||
st = time.time()
|
||||
loss = loss.cpu().data
|
||||
cat = np.argmax(out.cpu().data, axis=1)
|
||||
accuracy = (cat == Y).mean()
|
||||
finish_time = (time.time()-st)*1000.0
|
||||
|
||||
|
||||
# printing
|
||||
t.set_description("loss %.2f accuracy %.2f -- %.2f + %.2f + %.2f + %.2f = %.2f -- %d" %
|
||||
(loss, accuracy,
|
||||
|
||||
@@ -180,8 +180,8 @@ class EfficientNet:
|
||||
def forward(self, x):
|
||||
x = x.pad2d(padding=(0,1,0,1))
|
||||
x = self._bn0(x.conv2d(self._conv_stem, stride=2)).swish()
|
||||
#print(x.shape, x.data[:, 0, 0, 0])
|
||||
for block in self._blocks:
|
||||
#print(x.shape)
|
||||
x = block(x)
|
||||
x = self._bn1(x.conv2d(self._conv_head)).swish()
|
||||
x = x.avg_pool2d(kernel_size=x.shape[2:4])
|
||||
@@ -189,7 +189,7 @@ class EfficientNet:
|
||||
#x = x.dropout(0.2)
|
||||
return x.dot(self._fc).add(self._fc_bias.reshape(shape=[1,-1]))
|
||||
|
||||
def load_weights_from_torch(self, gpu):
|
||||
def load_weights_from_torch(self):
|
||||
# load b0
|
||||
# https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/utils.py#L551
|
||||
if self.number == 0:
|
||||
@@ -223,7 +223,10 @@ class EfficientNet:
|
||||
except AttributeError:
|
||||
mv = eval(mk.replace(".bias", "_bias"))
|
||||
vnp = v.numpy().astype(np.float32) if USE_TORCH else v
|
||||
mv.data[:] = vnp if k != '_fc.weight' else vnp.T
|
||||
if gpu:
|
||||
mv.cuda_()
|
||||
vnp = vnp if k != '_fc.weight' else vnp.T
|
||||
|
||||
if mv.shape == vnp.shape or vnp.shape == ():
|
||||
mv.data[:] = vnp
|
||||
else:
|
||||
print("MISMATCH SHAPE IN %s, %r %r" % (k, mv.shape, vnp.shape))
|
||||
|
||||
|
||||
@@ -2,20 +2,25 @@ from tinygrad.tensor import Tensor
|
||||
|
||||
class BatchNorm2D:
|
||||
def __init__(self, sz, eps=0.001):
|
||||
self.eps = eps
|
||||
self.eps = Tensor([eps], requires_grad=False)
|
||||
self.two = Tensor([2], requires_grad=False)
|
||||
self.weight = Tensor.ones(sz)
|
||||
self.bias = Tensor.zeros(sz)
|
||||
|
||||
# TODO: need running_mean and running_var
|
||||
self.running_mean = Tensor.zeros(sz)
|
||||
self.running_var = Tensor.ones(sz)
|
||||
self.running_mean = Tensor.zeros(sz, requires_grad=False)
|
||||
self.running_var = Tensor.ones(sz, requires_grad=False)
|
||||
self.num_batches_tracked = Tensor.zeros(1, requires_grad=False)
|
||||
|
||||
def __call__(self, x):
|
||||
# TODO: use tinyops for this
|
||||
# mean op needs to support the axis argument before we can do this
|
||||
#self.running_mean.data = x.data.mean(axis=(0,2,3))
|
||||
#self.running_var.data = ((x - self.running_mean.reshape(shape=[1, -1, 1, 1]))**self.two).data.mean(axis=(0,2,3))
|
||||
|
||||
# this work at inference?
|
||||
x = x.sub(self.running_mean.reshape(shape=[1, -1, 1, 1]))
|
||||
x = x.mul(self.weight.reshape(shape=[1, -1, 1, 1]))
|
||||
x = x.div(self.running_var.add(Tensor([self.eps], gpu=x.gpu)).reshape(shape=[1, -1, 1, 1]).sqrt())
|
||||
x = x.div(self.running_var.add(self.eps).reshape(shape=[1, -1, 1, 1]).sqrt())
|
||||
x = x.add(self.bias.reshape(shape=[1, -1, 1, 1]))
|
||||
return x
|
||||
|
||||
|
||||
@@ -195,23 +195,23 @@ class Tensor:
|
||||
# ***** non first class ops *****
|
||||
|
||||
def mean(self):
|
||||
div = Tensor(np.array([1/np.prod(self.shape)], dtype=self.dtype), gpu=self.gpu)
|
||||
div = Tensor(np.array([1/np.prod(self.shape)], dtype=self.dtype), gpu=self.gpu, requires_grad=False)
|
||||
return self.sum().mul(div)
|
||||
|
||||
def sqrt(self):
|
||||
root = Tensor(np.zeros(self.shape, dtype=self.dtype)+0.5, gpu=self.gpu)
|
||||
root = Tensor(np.zeros(self.shape, dtype=self.dtype)+0.5, gpu=self.gpu, requires_grad=False)
|
||||
return self.pow(root)
|
||||
|
||||
def div(self, y):
|
||||
root = Tensor(np.zeros(self.shape, dtype=self.dtype)-1, gpu=self.gpu)
|
||||
root = Tensor(np.zeros(self.shape, dtype=self.dtype)-1, gpu=self.gpu, requires_grad=False)
|
||||
return self.mul(y.pow(root))
|
||||
|
||||
def swish(self):
|
||||
return self.mul(self.sigmoid())
|
||||
|
||||
def tanh(self):
|
||||
t2 = Tensor(np.zeros(self.shape, dtype=self.dtype)+2, gpu=self.gpu)
|
||||
t1 = Tensor(np.zeros(self.shape, dtype=self.dtype)+1, gpu=self.gpu)
|
||||
t2 = Tensor(np.zeros(self.shape, dtype=self.dtype)+2, gpu=self.gpu, requires_grad=False)
|
||||
t1 = Tensor(np.zeros(self.shape, dtype=self.dtype)+1, gpu=self.gpu, requires_grad=False)
|
||||
return self.mul(t2).sigmoid().mul(t2) - t1 # 2*sigmoid(2*x)-1
|
||||
|
||||
# An instantiation of the Function is the Context
|
||||
@@ -251,7 +251,7 @@ def register(name, fxn, gpu=False):
|
||||
f.cl_ctx, f.cl_queue = cl_ctx, cl_queue
|
||||
return f.apply(f, *x, **kwargs)
|
||||
setattr(Tensor, name, dispatch)
|
||||
if name in ['add', 'sub', 'mul', 'div']:
|
||||
if name in ['add', 'sub', 'mul', 'div', 'pow']:
|
||||
setattr(Tensor, "__%s__" % name, dispatch)
|
||||
setattr(Tensor, "__i%s__" % name, lambda self,x: self.assign(dispatch(self,x)))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user