mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
make anchors use Tensors
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
from collections import OrderedDict
|
||||
import unicodedata
|
||||
from typing import Optional, Tuple
|
||||
from typing import Optional, Tuple, List
|
||||
import numpy as np
|
||||
from tinygrad.nn import state
|
||||
from tinygrad.tensor import Tensor, dtypes
|
||||
@@ -275,3 +275,37 @@ def box_iou(boxes1:np.ndarray, boxes2:np.ndarray) -> np.ndarray:
|
||||
|
||||
inter, union = _box_inter_union(boxes1, boxes2)
|
||||
return inter / union
|
||||
|
||||
def generate_anchors(input_size:Tuple[int, int], batch_size:int = 1, scales:Optional[Tuple[Tensor, ...]] = None, aspect_ratios:Optional[Tuple[Tensor, ...]] = None) -> List[Tensor]:
|
||||
def _compute_grid_sizes(input_size:Tuple[int, int]) -> np.ndarray:
|
||||
return np.ceil(np.array(input_size)[None, :] / 2 ** np.arange(3, 8)[:, None])
|
||||
|
||||
scales = tuple(Tensor((i, int(i * 2 ** (1/3)), int(i * 2 ** (2/3)))) for i in 2 ** np.arange(5, 10)) if scales is None else scales
|
||||
aspect_ratios = ((0.5, 1.0, 2.0),) * len(scales) if aspect_ratios is None else aspect_ratios
|
||||
aspect_ratios = tuple(Tensor(ar) for ar in aspect_ratios)
|
||||
grid_sizes = _compute_grid_sizes(input_size)
|
||||
|
||||
assert len(scales) == len(aspect_ratios) == len(grid_sizes), "scales, aspect_ratios, and grid_sizes must have the same length"
|
||||
|
||||
anchors_over_all_feature_maps = []
|
||||
for s, ar, gs in zip(scales, aspect_ratios, grid_sizes):
|
||||
h_ratios = ar.sqrt()
|
||||
w_ratios = 1 / h_ratios
|
||||
ws = (w_ratios[:, None] * s[None, :]).reshape(-1)
|
||||
hs = (h_ratios[:, None] * s[None, :]).reshape(-1)
|
||||
base_anchors = (Tensor.stack(-ws, -hs, ws, hs, dim=1) / 2).round()
|
||||
stride_h, stride_w = input_size[0] // gs[0], input_size[1] // gs[1]
|
||||
shifts_x, shifts_y = (Tensor.arange(gs[1]) * stride_w).meshgrid(Tensor.arange(gs[0]) * stride_h, indexing="xy")
|
||||
shifts_x, shifts_y = shifts_x.reshape(-1), shifts_y.reshape(-1)
|
||||
shifts = Tensor.stack(shifts_x, shifts_y, shifts_x, shifts_y, dim=1)
|
||||
anchors_over_all_feature_maps.append((shifts[:, None] + base_anchors[None, :]).reshape(-1, 4))
|
||||
|
||||
if batch_size > 1:
|
||||
anchors = []
|
||||
for _ in range(batch_size):
|
||||
anchors_in_img = [a for a in anchors_over_all_feature_maps]
|
||||
anchors.append(anchors_in_img)
|
||||
|
||||
return [Tensor.cat(*anchors_per_img) for anchors_per_img in anchors]
|
||||
|
||||
return anchors_over_all_feature_maps
|
||||
|
||||
@@ -206,7 +206,7 @@ def resize(img:Image, tgt:Optional[Dict[str, Union[np.ndarray, Tuple]]]=None, si
|
||||
|
||||
return img, img_size
|
||||
|
||||
def normalize(img:Tensor, device:List[str]):
|
||||
def normalize(img:Tensor, device:Optional[List[str]] = None):
|
||||
mean = Tensor([0.485, 0.456, 0.406], device=device, dtype=dtypes.float32).reshape(1, -1, 1, 1)
|
||||
std = Tensor([0.229, 0.224, 0.225], device=device, dtype=dtypes.float32).reshape(1, -1, 1, 1)
|
||||
img = ((img.permute([0, 3, 1, 2]) / 255.0) - mean) / std
|
||||
|
||||
@@ -4,6 +4,7 @@ import math
|
||||
from tinygrad import Tensor, dtypes
|
||||
from tinygrad.helpers import flatten, get_child
|
||||
import tinygrad.nn as nn
|
||||
from examples.mlperf.helpers import generate_anchors
|
||||
from examples.mlperf.initializers import Conv2dNormal, Conv2dKaimingUniform
|
||||
from examples.mlperf.losses import sigmoid_focal_loss
|
||||
from extra.models.helpers import meshgrid, nms
|
||||
@@ -20,24 +21,6 @@ def decode_bbox(offsets, anchors):
|
||||
pred_x2, pred_y2 = pred_cx + 0.5 * pred_w, pred_cy + 0.5 * pred_h
|
||||
return np.stack([pred_x1, pred_y1, pred_x2, pred_y2], axis=1, dtype=np.float32)
|
||||
|
||||
def generate_anchors(input_size, grid_sizes, scales, aspect_ratios):
|
||||
assert len(scales) == len(aspect_ratios) == len(grid_sizes)
|
||||
anchors = []
|
||||
for s, ar, gs in zip(scales, aspect_ratios, grid_sizes):
|
||||
s, ar = np.array(s), np.array(ar)
|
||||
h_ratios = np.sqrt(ar)
|
||||
w_ratios = 1 / h_ratios
|
||||
ws = (w_ratios[:, None] * s[None, :]).reshape(-1)
|
||||
hs = (h_ratios[:, None] * s[None, :]).reshape(-1)
|
||||
base_anchors = (np.stack([-ws, -hs, ws, hs], axis=1) / 2).round()
|
||||
stride_h, stride_w = input_size[0] // gs[0], input_size[1] // gs[1]
|
||||
shifts_x, shifts_y = np.meshgrid(np.arange(gs[1]) * stride_w, np.arange(gs[0]) * stride_h)
|
||||
shifts_x = shifts_x.reshape(-1)
|
||||
shifts_y = shifts_y.reshape(-1)
|
||||
shifts = np.stack([shifts_x, shifts_y, shifts_x, shifts_y], axis=1, dtype=np.float32)
|
||||
anchors.append((shifts[:, None] + base_anchors[None, :]).reshape(-1, 4))
|
||||
return anchors
|
||||
|
||||
class RetinaNet:
|
||||
def __init__(self, backbone: ResNet, num_classes=264, num_anchors=9, scales=None, aspect_ratios=None):
|
||||
assert isinstance(backbone, ResNet)
|
||||
@@ -48,7 +31,6 @@ class RetinaNet:
|
||||
|
||||
self.backbone = ResNetFPN(backbone)
|
||||
self.head = RetinaHead(self.backbone.out_channels, num_anchors=num_anchors, num_classes=num_classes)
|
||||
self.anchor_gen = lambda input_size: generate_anchors(input_size, self.backbone.compute_grid_sizes(input_size), scales, aspect_ratios)
|
||||
|
||||
def __call__(self, x:Tensor, y:Optional[Tensor] = None, matches:Optional[Tensor] = None):
|
||||
return self.forward(x, y=y, matches=matches)
|
||||
@@ -73,7 +55,7 @@ class RetinaNet:
|
||||
|
||||
# predictions: (BS, (H1W1+...+HmWm)A, 4 + K)
|
||||
def postprocess_detections(self, predictions, input_size=(800, 800), image_sizes=None, orig_image_sizes=None, score_thresh=0.05, topk_candidates=1000, nms_thresh=0.5):
|
||||
anchors = self.anchor_gen(input_size)
|
||||
anchors = generate_anchors(input_size)
|
||||
grid_sizes = self.backbone.compute_grid_sizes(input_size)
|
||||
split_idx = np.cumsum([int(self.num_anchors * sz[0] * sz[1]) for sz in grid_sizes[:-1]])
|
||||
detections = []
|
||||
@@ -86,6 +68,8 @@ class RetinaNet:
|
||||
|
||||
image_boxes, image_scores, image_labels = [], [], []
|
||||
for offsets_per_level, scores_per_level, anchors_per_level in zip(offsets_per_image, scores_per_image, anchors):
|
||||
anchors_per_level = anchors_per_level.numpy()
|
||||
|
||||
# remove low scoring boxes
|
||||
scores_per_level = scores_per_level.flatten()
|
||||
keep_idxs = scores_per_level > score_thresh
|
||||
|
||||
Reference in New Issue
Block a user