diff --git a/extra/efficientnet.py b/extra/efficientnet.py index 46c8b514fd..ba9561537e 100644 --- a/extra/efficientnet.py +++ b/extra/efficientnet.py @@ -73,7 +73,7 @@ class MBConvBlock: def __init__(self, kernel_size, strides, expand_ratio, input_filters, output_filters, se_ratio, has_se): oup = expand_ratio * input_filters if expand_ratio != 1: - self._expand_conv = Tensor.zeros(oup, input_filters, 1, 1) + self._expand_conv = Tensor.uniform(oup, input_filters, 1, 1) self._bn0 = BatchNorm2D(oup) else: self._expand_conv = None @@ -84,18 +84,18 @@ class MBConvBlock: else: self.pad = [(kernel_size-1)//2]*4 - self._depthwise_conv = Tensor.zeros(oup, 1, kernel_size, kernel_size) + self._depthwise_conv = Tensor.uniform(oup, 1, kernel_size, kernel_size) self._bn1 = BatchNorm2D(oup) self.has_se = has_se if self.has_se: num_squeezed_channels = max(1, int(input_filters * se_ratio)) - self._se_reduce = Tensor.zeros(num_squeezed_channels, oup, 1, 1) + self._se_reduce = Tensor.uniform(num_squeezed_channels, oup, 1, 1) self._se_reduce_bias = Tensor.zeros(num_squeezed_channels) - self._se_expand = Tensor.zeros(oup, num_squeezed_channels, 1, 1) + self._se_expand = Tensor.uniform(oup, num_squeezed_channels, 1, 1) self._se_expand_bias = Tensor.zeros(oup) - self._project_conv = Tensor.zeros(output_filters, oup, 1, 1) + self._project_conv = Tensor.uniform(output_filters, oup, 1, 1) self._bn2 = BatchNorm2D(output_filters) def __call__(self, inputs): @@ -148,7 +148,7 @@ class EfficientNet: return int(math.ceil(global_params[1] * repeats)) out_channels = round_filters(32) - self._conv_stem = Tensor.zeros(out_channels, 3, 3, 3) + self._conv_stem = Tensor.uniform(out_channels, 3, 3, 3) self._bn0 = BatchNorm2D(out_channels) blocks_args = [ [1, 3, (1,1), 1, 32, 16, 0.25], @@ -172,9 +172,9 @@ class EfficientNet: in_channels = round_filters(320) out_channels = round_filters(1280) - self._conv_head = Tensor.zeros(out_channels, in_channels, 1, 1) + self._conv_head = Tensor.uniform(out_channels, in_channels, 1, 1) self._bn1 = BatchNorm2D(out_channels) - self._fc = Tensor.zeros(out_channels, classes) + self._fc = Tensor.uniform(out_channels, classes) self._fc_bias = Tensor.zeros(classes) def forward(self, x): diff --git a/tinygrad/nn.py b/tinygrad/nn.py index b06f0b0537..f79b9408a5 100644 --- a/tinygrad/nn.py +++ b/tinygrad/nn.py @@ -3,12 +3,12 @@ from tinygrad.tensor import Tensor class BatchNorm2D: def __init__(self, sz, eps=0.001): self.eps = eps - self.weight = Tensor.zeros(sz) + self.weight = Tensor.ones(sz) self.bias = Tensor.zeros(sz) # TODO: need running_mean and running_var self.running_mean = Tensor.zeros(sz) - self.running_var = Tensor.zeros(sz) + self.running_var = Tensor.ones(sz) self.num_batches_tracked = Tensor.zeros(1, requires_grad=False) def __call__(self, x):