use tinynn for Conv2d

2026-01-09 15:08:02 -05:00 · 2021-10-30 19:40:44 -07:00
parent 6bee5bdb7d
commit 121d5a17ee
4 changed files with 7 additions and 37 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ dist
 *.egg-info
 /env
 a.out
+boxes.jpg
--- a/README.md
+++ b/README.md
@@ -12,9 +12,9 @@ This may not be the best deep learning framework, but it is a deep learning fram

 The core of it is in `tinygrad/`

-Due to its extreme simplicity, it aims to be the easiest framework to add new accelerators to, with support for both inference and training. Support the simple basic ops, and you get SOTA [vision](https://arxiv.org/abs/1905.11946) `models/efficientnet.py` and [language](https://arxiv.org/abs/1706.03762) `models/transformer.py` models. We are working on support for the Apple Neural Engine.
+Due to its extreme simplicity, it aims to be the easiest framework to add new accelerators to, with support for both inference and training. Support the simple basic ops, and you get SOTA [vision](https://arxiv.org/abs/1905.11946) `models/efficientnet.py` and [language](https://arxiv.org/abs/1706.03762) `models/transformer.py` models.

-Eventually, [we will build custom hardware](https://geohot.github.io/blog/jekyll/update/2021/06/13/a-breakdown-of-ai-chip-companies.html) for tinygrad, and it will be blindingly fast. Now, it is slow.
+We are working on support for the Apple Neural Engine and the Google TPU in the `accel/` folder. Eventually, [we will build custom hardware](https://geohot.github.io/blog/jekyll/update/2021/06/13/a-breakdown-of-ai-chip-companies.html) for tinygrad, and it will be blindingly fast. Now, it is slow.

 ### Installation

@@ -94,7 +94,7 @@ from tinygrad.tensor import Tensor
 (Tensor.ones(4,4).gpu() + Tensor.ones(4,4).gpu()).cpu()
 ```

-### ANE Support?!
+### ANE Support?! (broken)

 If all you want to do is ReLU, you are in luck! You can do very fast ReLU (at least 30 MEGAReLUs/sec confirmed)

--- a/examples/yolo_nn.py
+++ b/examples/yolo_nn.py
@@ -56,34 +56,3 @@ class LeakyReLU:
  def __call__(self, input):
    return input.leakyrelu(self.neg_slope)

-
-class Conv2d:
-  def __init__(self, in_channels, out_channels, kernel_size, stride = 1, padding = 0, groups = 1, bias = True):
-    self.in_channels, self.out_channels, self.stride, self.padding, self.groups, self.bias = in_channels, out_channels, stride, padding, groups, bias # Wow this is terrible
-
-    self.kernel_size = (kernel_size, kernel_size) if isinstance(kernel_size, int) else kernel_size
-
-    assert out_channels % groups == 0 and in_channels % groups == 0
-
-    self.weight = Tensor.uniform(out_channels, in_channels // groups, *self.kernel_size)
-    if self.bias:
-      self.bias = Tensor.uniform(1, out_channels, 1, 1)
-    else:
-      self.bias = None
-  
-  def __repr__(self):
-    return f"Conv2d({self.in_channels!r}, {self.out_channels!r}, kernel_size={self.kernel_size!r} stride={self.stride!r}"
-  
-  def __call__(self, x):
-    if self.padding != 0:
-      if self.bias is not None:
-        x = x.pad2d(padding=[self.padding] * 4).conv2d(self.weight, stride=self.stride, groups=self.groups).add(self.bias)
-      else:
-        x = x.pad2d(padding=[self.padding] * 4).conv2d(self.weight, stride=self.stride, groups=self.groups)
-    else:
-      if self.bias is not None:
-        x = x.conv2d(self.weight, stride=self.stride, groups=self.groups).add(self.bias)
-      else:
-        x = x.conv2d(self.weight, stride=self.stride, groups=self.groups)
-
-    return x
--- a/examples/yolov3.py
+++ b/examples/yolov3.py
@@ -10,8 +10,8 @@ import numpy as np
 np.set_printoptions(suppress=True)
 from tinygrad.tensor import Tensor
 from extra.utils import fetch, get_parameters
-from yolo_nn import Conv2d, Upsample, EmptyLayer, DetectionLayer, LeakyReLU, MaxPool2d
-from tinygrad.nn import BatchNorm2D
+from yolo_nn import Upsample, EmptyLayer, DetectionLayer, LeakyReLU, MaxPool2d
+from tinygrad.nn import BatchNorm2D, Conv2d

 import cv2
 from PIL import Image
@@ -241,7 +241,7 @@ def infer(model, img):
  img = img[:,:,::-1].transpose((2,0,1))
  img = img[np.newaxis,:,:,:]/255.0

-  prediction = model.forward(Tensor(img))
+  prediction = model.forward(Tensor(img.astype(np.float32)))
  return prediction