diff --git a/extra/efficientnet.py b/extra/efficientnet.py
index 46c8b514fd..ba9561537e 100644
--- a/extra/efficientnet.py
+++ b/extra/efficientnet.py
@@ -73,7 +73,7 @@ class MBConvBlock:
   def __init__(self, kernel_size, strides, expand_ratio, input_filters, output_filters, se_ratio, has_se):
     oup = expand_ratio * input_filters
     if expand_ratio != 1:
-      self._expand_conv = Tensor.zeros(oup, input_filters, 1, 1)
+      self._expand_conv = Tensor.uniform(oup, input_filters, 1, 1)
       self._bn0 = BatchNorm2D(oup)
     else:
       self._expand_conv = None
@@ -84,18 +84,18 @@ class MBConvBlock:
     else:
       self.pad = [(kernel_size-1)//2]*4
 
-    self._depthwise_conv = Tensor.zeros(oup, 1, kernel_size, kernel_size)
+    self._depthwise_conv = Tensor.uniform(oup, 1, kernel_size, kernel_size)
     self._bn1 = BatchNorm2D(oup)
 
     self.has_se = has_se
     if self.has_se:
       num_squeezed_channels = max(1, int(input_filters * se_ratio))
-      self._se_reduce = Tensor.zeros(num_squeezed_channels, oup, 1, 1)
+      self._se_reduce = Tensor.uniform(num_squeezed_channels, oup, 1, 1)
       self._se_reduce_bias = Tensor.zeros(num_squeezed_channels)
-      self._se_expand = Tensor.zeros(oup, num_squeezed_channels, 1, 1)
+      self._se_expand = Tensor.uniform(oup, num_squeezed_channels, 1, 1)
       self._se_expand_bias = Tensor.zeros(oup)
 
-    self._project_conv = Tensor.zeros(output_filters, oup, 1, 1)
+    self._project_conv = Tensor.uniform(output_filters, oup, 1, 1)
     self._bn2 = BatchNorm2D(output_filters)
 
   def __call__(self, inputs):
@@ -148,7 +148,7 @@ class EfficientNet:
       return int(math.ceil(global_params[1] * repeats))
 
     out_channels = round_filters(32)
-    self._conv_stem = Tensor.zeros(out_channels, 3, 3, 3)
+    self._conv_stem = Tensor.uniform(out_channels, 3, 3, 3)
     self._bn0 = BatchNorm2D(out_channels)
     blocks_args = [
       [1, 3, (1,1), 1, 32, 16, 0.25],
@@ -172,9 +172,9 @@ class EfficientNet:
 
     in_channels = round_filters(320)
     out_channels = round_filters(1280)
-    self._conv_head = Tensor.zeros(out_channels, in_channels, 1, 1)
+    self._conv_head = Tensor.uniform(out_channels, in_channels, 1, 1)
     self._bn1 = BatchNorm2D(out_channels)
-    self._fc = Tensor.zeros(out_channels, classes)
+    self._fc = Tensor.uniform(out_channels, classes)
     self._fc_bias = Tensor.zeros(classes)
 
   def forward(self, x):
diff --git a/tinygrad/nn.py b/tinygrad/nn.py
index b06f0b0537..f79b9408a5 100644
--- a/tinygrad/nn.py
+++ b/tinygrad/nn.py
@@ -3,12 +3,12 @@ from tinygrad.tensor import Tensor
 class BatchNorm2D:
   def __init__(self, sz, eps=0.001):
     self.eps = eps
-    self.weight = Tensor.zeros(sz)
+    self.weight = Tensor.ones(sz)
     self.bias = Tensor.zeros(sz)
 
     # TODO: need running_mean and running_var
     self.running_mean = Tensor.zeros(sz)
-    self.running_var = Tensor.zeros(sz)
+    self.running_var = Tensor.ones(sz)
     self.num_batches_tracked = Tensor.zeros(1, requires_grad=False)
 
   def __call__(self, x):