BatchNorm2D -> BatchNorm2d (#558)

* BatchNorm2D -> BatchNorm2d * Fix typo
2026-01-09 23:18:04 -05:00 · 2023-02-16 12:31:49 -08:00
parent 09315ef34f
commit e172f0087a
9 changed files with 34 additions and 35 deletions
--- a/examples/hlb_cifar10.py
+++ b/examples/hlb_cifar10.py
@@ -20,7 +20,7 @@ class ConvGroup:
  def __init__(self, channels_in, channels_out, short, se=True):
    self.short, self.se = short, se and not short
    self.conv = [nn.Conv2d(channels_in if i == 0 else channels_out, channels_out, kernel_size=3, padding=1, bias=False) for i in range(1 if short else 3)]
-    self.norm = [nn.BatchNorm2D(channels_out, track_running_stats=False, eps=1e-12, momentum=0.8) for _ in range(1 if short else 3)]
+    self.norm = [nn.BatchNorm2d(channels_out, track_running_stats=False, eps=1e-12, momentum=0.8) for _ in range(1 if short else 3)]
    if self.se: self.se1, self.se2 = nn.Linear(channels_out, channels_out//16), nn.Linear(channels_out//16, channels_out)

  def __call__(self, x):
@@ -38,7 +38,7 @@ class SpeedyResNet:
    # TODO: add whitening
    self.net = [
      nn.Conv2d(3, 64, kernel_size=1),
-      nn.BatchNorm2D(64, track_running_stats=False, eps=1e-12, momentum=0.8),
+      nn.BatchNorm2d(64, track_running_stats=False, eps=1e-12, momentum=0.8),
      lambda x: x.relu(),
      ConvGroup(64, 128, short=False),
      ConvGroup(128, 256, short=True),
--- a/examples/serious_mnist.py
+++ b/examples/serious_mnist.py
@@ -3,7 +3,7 @@
 import sys
 import numpy as np
 from tinygrad.tensor import Tensor
-from tinygrad.nn import BatchNorm2D, optim
+from tinygrad.nn import BatchNorm2d, optim
 from tinygrad.helpers import getenv
 from datasets import fetch_mnist
 from extra.augment import augment_img
@@ -39,7 +39,7 @@ class ConvBlock:
    self.cweights = [Tensor.uniform(filters, inp if i==0 else filters, conv, conv) for i in range(3)]
    self.cbiases = [Tensor.uniform(1, filters, 1, 1) for i in range(3)]
    #init layers
-    self._bn = BatchNorm2D(128)
+    self._bn = BatchNorm2d(128)
    self._seb = SqueezeExciteBlock2D(filters)

  def __call__(self, input):
--- a/examples/yolov3.py
+++ b/examples/yolov3.py
@@ -7,7 +7,7 @@ import cv2
 import numpy as np
 from PIL import Image
 from tinygrad.tensor import Tensor
-from tinygrad.nn import BatchNorm2D, Conv2d
+from tinygrad.nn import BatchNorm2d, Conv2d
 from tinygrad.helpers import getenv
 from extra.utils import fetch, get_parameters
 from examples.yolo.yolo_nn import Upsample, EmptyLayer, DetectionLayer, LeakyReLU, MaxPool2d
@@ -350,7 +350,7 @@ class Darknet:

        # BatchNorm2d
        if batch_normalize:
-          bn = BatchNorm2D(filters, eps=1e-05, track_running_stats=True)
+          bn = BatchNorm2d(filters, eps=1e-05, track_running_stats=True)
          module.append(bn)

        # LeakyReLU activation
--- a/models/efficientnet.py
+++ b/models/efficientnet.py
@@ -1,7 +1,7 @@
 import math
 import numpy as np
 from tinygrad.tensor import Tensor
-from tinygrad.nn import BatchNorm2D
+from tinygrad.nn import BatchNorm2d
 from extra.utils import fetch, fake_torch_load, get_child

 class MBConvBlock:
@@ -9,7 +9,7 @@ class MBConvBlock:
    oup = expand_ratio * input_filters
    if expand_ratio != 1:
      self._expand_conv = Tensor.glorot_uniform(oup, input_filters, 1, 1)
-      self._bn0 = BatchNorm2D(oup, track_running_stats=track_running_stats)
+      self._bn0 = BatchNorm2d(oup, track_running_stats=track_running_stats)
    else:
      self._expand_conv = None

@@ -20,7 +20,7 @@ class MBConvBlock:
      self.pad = [(kernel_size-1)//2]*4

    self._depthwise_conv = Tensor.glorot_uniform(oup, 1, kernel_size, kernel_size)
-    self._bn1 = BatchNorm2D(oup, track_running_stats=track_running_stats)
+    self._bn1 = BatchNorm2d(oup, track_running_stats=track_running_stats)

    self.has_se = has_se
    if self.has_se:
@@ -31,7 +31,7 @@ class MBConvBlock:
      self._se_expand_bias = Tensor.zeros(oup)

    self._project_conv = Tensor.glorot_uniform(output_filters, oup, 1, 1)
-    self._bn2 = BatchNorm2D(output_filters, track_running_stats=track_running_stats)
+    self._bn2 = BatchNorm2d(output_filters, track_running_stats=track_running_stats)

  def __call__(self, inputs):
    x = inputs
@@ -82,7 +82,7 @@ class EfficientNet:

    out_channels = round_filters(32)
    self._conv_stem = Tensor.glorot_uniform(out_channels, input_channels, 3, 3)
-    self._bn0 = BatchNorm2D(out_channels, track_running_stats=track_running_stats)
+    self._bn0 = BatchNorm2d(out_channels, track_running_stats=track_running_stats)
    blocks_args = [
      [1, 3, (1,1), 1, 32, 16, 0.25],
      [2, 3, (2,2), 6, 16, 24, 0.25],
@@ -116,7 +116,7 @@ class EfficientNet:
    in_channels = round_filters(320)
    out_channels = round_filters(1280)
    self._conv_head = Tensor.glorot_uniform(out_channels, in_channels, 1, 1)
-    self._bn1 = BatchNorm2D(out_channels, track_running_stats=track_running_stats)
+    self._bn1 = BatchNorm2d(out_channels, track_running_stats=track_running_stats)
    if has_fc_output:
      self._fc = Tensor.glorot_uniform(out_channels, classes)
      self._fc_bias = Tensor.zeros(classes)
--- a/models/resnet.py
+++ b/models/resnet.py
@@ -8,14 +8,14 @@ class BasicBlock:

  def __init__(self, in_planes, planes, stride=1):
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
-    self.bn1 = nn.BatchNorm2D(planes)
+    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, stride=1, bias=False)
-    self.bn2 = nn.BatchNorm2D(planes)
+    self.bn2 = nn.BatchNorm2d(planes)
    self.downsample = []
    if stride != 1 or in_planes != self.expansion*planes:
      self.downsample = [
        nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
-        nn.BatchNorm2D(self.expansion*planes)
+        nn.BatchNorm2d(self.expansion*planes)
      ]

  def __call__(self, x):
@@ -31,16 +31,16 @@ class Bottleneck:

  def __init__(self, in_planes, planes, stride=1):
    self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
-    self.bn1 = nn.BatchNorm2D(planes)
+    self.bn1 = nn.BatchNorm2d(planes)
    self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, stride=stride, bias=False)
-    self.bn2 = nn.BatchNorm2D(planes)
+    self.bn2 = nn.BatchNorm2d(planes)
    self.conv3 = nn.Conv2d(planes, self.expansion *planes, kernel_size=1, bias=False)
-    self.bn3 = nn.BatchNorm2D(self.expansion*planes)
+    self.bn3 = nn.BatchNorm2d(self.expansion*planes)
    self.downsample = []
    if stride != 1 or in_planes != self.expansion*planes:
      self.downsample = [
        nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
-        nn.BatchNorm2D(self.expansion*planes)
+        nn.BatchNorm2d(self.expansion*planes)
      ]

  def __call__(self, x):
@@ -75,7 +75,7 @@ class ResNet:
    self.in_planes = 64

    self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, bias=False, padding=3)
-    self.bn1 = nn.BatchNorm2D(64)
+    self.bn1 = nn.BatchNorm2d(64)
    self.layer1 = self._make_layer(self.block, 64, self.num_blocks[0], stride=2)
    self.layer2 = self._make_layer(self.block, 128, self.num_blocks[1], stride=2)
    self.layer3 = self._make_layer(self.block, 256, self.num_blocks[2], stride=2)
--- a/test/external_test_opt.py
+++ b/test/external_test_opt.py
@@ -46,7 +46,7 @@ class TestOpt(unittest.TestCase):
    # TODO: with Tensor.training
    Tensor.training = True
    img = Tensor.ones(1,32,4,4)
-    bn = nn.BatchNorm2D(32, track_running_stats=False)
+    bn = nn.BatchNorm2d(32, track_running_stats=False)
    with CLCache():
      img_bn = bn(img).realize()
      print(img_bn)
@@ -73,7 +73,7 @@ class TestOpt(unittest.TestCase):
    Tensor.training = True
    img = Tensor.ones(1,3,4,4)
    c1 = nn.Conv2d(3,32,3)
-    bn = nn.BatchNorm2D(32, track_running_stats=False)
+    bn = nn.BatchNorm2d(32, track_running_stats=False)
    opt = optim.SGD(optim.get_parameters([c1, bn]))
    with CLCache():
      img_bn = bn(c1(img)).elu().sum()
@@ -86,7 +86,7 @@ class TestOpt(unittest.TestCase):
  def test_fold_conv_batchnorm_notrain(self):
    img = Tensor.ones(1,3,8,8)
    c1 = nn.Conv2d(3,32,3)
-    bn = nn.BatchNorm2D(32, track_running_stats=False)
+    bn = nn.BatchNorm2d(32, track_running_stats=False)
    # precache the bn
    img_conv = bn(c1(img)).relu().realize()
    with CLCache():
@@ -97,7 +97,7 @@ class TestOpt(unittest.TestCase):
    Tensor.training = True
    img = Tensor.ones(1,3,8,8)
    c1 = nn.Conv2d(3,32,3)
-    bn = nn.BatchNorm2D(32, track_running_stats=False)
+    bn = nn.BatchNorm2d(32, track_running_stats=False)
    with CLCache():
      img_conv = bn(c1(img)).relu().realize()
      print(img_conv)
@@ -132,4 +132,4 @@ class TestOpt(unittest.TestCase):
      assert len(GlobalCounters.cache) == 2, "optimizer didn't fold conv/relu"

 if __name__ == '__main__':
-  unittest.main()
+  unittest.main()
--- a/test/graph_batchnorm.py
+++ b/test/graph_batchnorm.py
@@ -1,5 +1,5 @@
 from tinygrad.tensor import Tensor
-from tinygrad.nn import Conv2d, BatchNorm2D, optim
+from tinygrad.nn import Conv2d, BatchNorm2d, optim
 from extra.utils import get_parameters  # TODO: move to optim
 import unittest

@@ -38,9 +38,9 @@ class TestBatchnorm(unittest.TestCase):
    class LilModel:
      def __init__(self):
        self.c = Conv2d(12, 24, 3, padding=1, bias=False)
-        self.bn = BatchNorm2D(24, track_running_stats=False)
+        self.bn = BatchNorm2d(24, track_running_stats=False)
        self.c2 = Conv2d(24, 32, 3, padding=1, bias=False)
-        self.bn2 = BatchNorm2D(32, track_running_stats=False)
+        self.bn2 = BatchNorm2d(32, track_running_stats=False)
      def forward(self, x):
        x = self.bn(self.c(x)).relu()
        return self.bn2(self.c2(x)).relu()
@@ -51,7 +51,7 @@ class TestBatchnorm(unittest.TestCase):
    class LilModel:
      def __init__(self):
        self.c = Conv2d(12, 32, 3, padding=1, bias=False)
-        self.bn = BatchNorm2D(32, track_running_stats=False)
+        self.bn = BatchNorm2d(32, track_running_stats=False)
      def forward(self, x):
        return self.bn(self.c(x)).relu()
    lm = LilModel()
@@ -59,4 +59,4 @@ class TestBatchnorm(unittest.TestCase):


 if __name__ == '__main__':
-  unittest.main()
+  unittest.main()
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -2,7 +2,7 @@
 import unittest
 import numpy as np
 from tinygrad.tensor import Tensor, Device
-from tinygrad.nn import BatchNorm2D, Conv2d, Linear, GroupNorm, LayerNorm
+from tinygrad.nn import BatchNorm2d, Conv2d, Linear, GroupNorm, LayerNorm
 import torch

@unittest.skipUnless(Device.DEFAULT == Device.CPU, "Not Implemented")
@@ -13,7 +13,7 @@ class TestNN(unittest.TestCase):

    # create in tinygrad
    Tensor.training = training
-    bn = BatchNorm2D(sz, eps=1e-5, track_running_stats=training)
+    bn = BatchNorm2d(sz, eps=1e-5, track_running_stats=training)
    bn.weight = Tensor.randn(sz)
    bn.bias = Tensor.randn(sz)
    bn.running_mean = Tensor.randn(sz)
--- a/tinygrad/nn/init.py
+++ b/tinygrad/nn/init.py
@@ -1,9 +1,8 @@
 from tinygrad.tensor import Tensor

-# TODO: BatchNorm2D -> BatchNorm2d
-class BatchNorm2D:
+class BatchNorm2d:
  def __init__(self, sz, eps=1e-5, affine=True, track_running_stats=True, momentum=0.1):
-    assert affine, "BatchNorm2D is only supported with affine"
+    assert affine, "BatchNorm2d is only supported with affine"
    self.eps, self.track_running_stats, self.momentum = eps, track_running_stats, momentum

    self.weight, self.bias = Tensor.ones(sz), Tensor.zeros(sz)