mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-04-29 03:00:14 -04:00
trim dataloader related code needed from ref
This commit is contained in:
139
test/external/mlperf_retinanet/openimages.py
vendored
139
test/external/mlperf_retinanet/openimages.py
vendored
@@ -18,23 +18,6 @@ from test.external.mlperf_retinanet.boxes import box_iou
|
||||
from test.external.mlperf_retinanet.utils import Matcher
|
||||
|
||||
|
||||
def convert_coco_poly_to_mask(segmentations, height, width):
|
||||
masks = []
|
||||
for polygons in segmentations:
|
||||
rles = coco_mask.frPyObjects(polygons, height, width)
|
||||
mask = coco_mask.decode(rles)
|
||||
if len(mask.shape) < 3:
|
||||
mask = mask[..., None]
|
||||
mask = torch.as_tensor(mask, dtype=torch.uint8)
|
||||
mask = mask.any(dim=2)
|
||||
masks.append(mask)
|
||||
if masks:
|
||||
masks = torch.stack(masks, dim=0)
|
||||
else:
|
||||
masks = torch.zeros((0, height, width), dtype=torch.uint8)
|
||||
return masks
|
||||
|
||||
|
||||
class ConvertCocoPolysToMask(object):
|
||||
def __init__(self, filter_iscrowd=True):
|
||||
self.filter_iscrowd = filter_iscrowd
|
||||
@@ -86,97 +69,6 @@ class ConvertCocoPolysToMask(object):
|
||||
return image, target
|
||||
|
||||
|
||||
def _coco_remove_images_without_annotations(dataset, cat_list=None):
|
||||
def _has_only_empty_bbox(anno):
|
||||
return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
|
||||
|
||||
def _count_visible_keypoints(anno):
|
||||
return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
|
||||
|
||||
min_keypoints_per_image = 10
|
||||
|
||||
def _has_valid_annotation(anno):
|
||||
# if it's empty, there is no annotation
|
||||
if len(anno) == 0:
|
||||
return False
|
||||
# if all boxes have close to zero area, there is no annotation
|
||||
if _has_only_empty_bbox(anno):
|
||||
return False
|
||||
# keypoints task have a slight different critera for considering
|
||||
# if an annotation is valid
|
||||
if "keypoints" not in anno[0]:
|
||||
return True
|
||||
# for keypoint detection tasks, only consider valid images those
|
||||
# containing at least min_keypoints_per_image
|
||||
if _count_visible_keypoints(anno) >= min_keypoints_per_image:
|
||||
return True
|
||||
return False
|
||||
|
||||
assert isinstance(dataset, torchvision.datasets.CocoDetection)
|
||||
ids = []
|
||||
for ds_idx, img_id in enumerate(dataset.ids):
|
||||
ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
|
||||
anno = dataset.coco.loadAnns(ann_ids)
|
||||
if cat_list:
|
||||
anno = [obj for obj in anno if obj["category_id"] in cat_list]
|
||||
if _has_valid_annotation(anno):
|
||||
ids.append(ds_idx)
|
||||
|
||||
dataset = torch.utils.data.Subset(dataset, ids)
|
||||
return dataset
|
||||
|
||||
|
||||
def convert_to_coco_api(ds):
|
||||
coco_ds = COCO()
|
||||
# annotation IDs need to start at 1, not 0, see torchvision issue #1530
|
||||
ann_id = 1
|
||||
dataset = {'images': [], 'categories': [], 'annotations': []}
|
||||
categories = set()
|
||||
for img_idx in range(len(ds)):
|
||||
# find better way to get target
|
||||
# targets = ds.get_annotations(img_idx)
|
||||
img, targets = ds[img_idx]
|
||||
image_id = targets["image_id"].item()
|
||||
img_dict = {}
|
||||
img_dict['id'] = image_id
|
||||
img_dict['height'] = img.shape[-2]
|
||||
img_dict['width'] = img.shape[-1]
|
||||
dataset['images'].append(img_dict)
|
||||
bboxes = targets["boxes"]
|
||||
bboxes[:, 2:] -= bboxes[:, :2]
|
||||
bboxes = bboxes.tolist()
|
||||
labels = targets['labels'].tolist()
|
||||
areas = targets['area'].tolist()
|
||||
iscrowd = targets['iscrowd'].tolist()
|
||||
num_objs = len(bboxes)
|
||||
for i in range(num_objs):
|
||||
ann = {}
|
||||
ann['image_id'] = image_id
|
||||
ann['bbox'] = bboxes[i]
|
||||
ann['category_id'] = labels[i]
|
||||
categories.add(labels[i])
|
||||
ann['area'] = areas[i]
|
||||
ann['iscrowd'] = iscrowd[i]
|
||||
ann['id'] = ann_id
|
||||
dataset['annotations'].append(ann)
|
||||
ann_id += 1
|
||||
dataset['categories'] = [{'id': i} for i in sorted(categories)]
|
||||
coco_ds.dataset = dataset
|
||||
coco_ds.createIndex()
|
||||
return coco_ds
|
||||
|
||||
|
||||
def get_coco_api_from_dataset(dataset):
|
||||
for _ in range(10):
|
||||
if isinstance(dataset, torchvision.datasets.CocoDetection):
|
||||
break
|
||||
if isinstance(dataset, torch.utils.data.Subset):
|
||||
dataset = dataset.dataset
|
||||
if isinstance(dataset, torchvision.datasets.CocoDetection):
|
||||
return dataset.coco
|
||||
return convert_to_coco_api(dataset)
|
||||
|
||||
|
||||
class CocoDetection(torchvision.datasets.CocoDetection):
|
||||
def __init__(self, img_folder, ann_file, transforms):
|
||||
super(CocoDetection, self).__init__(img_folder, ann_file)
|
||||
@@ -191,31 +83,6 @@ class CocoDetection(torchvision.datasets.CocoDetection):
|
||||
return img, target
|
||||
|
||||
|
||||
def get_coco(name, root, image_set, transforms, mode='instances'):
|
||||
anno_file_template = "{}_{}2017.json"
|
||||
PATHS = {
|
||||
"train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
|
||||
"val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
|
||||
}
|
||||
|
||||
t = [ConvertCocoPolysToMask(filter_iscrowd=True)]
|
||||
|
||||
if transforms is not None:
|
||||
t.append(transforms)
|
||||
transforms = T.Compose(t)
|
||||
|
||||
img_folder, ann_file = PATHS[image_set]
|
||||
img_folder = os.path.join(root, img_folder)
|
||||
ann_file = os.path.join(root, ann_file)
|
||||
|
||||
dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
|
||||
|
||||
if image_set == "train":
|
||||
dataset = _coco_remove_images_without_annotations(dataset)
|
||||
|
||||
return dataset
|
||||
|
||||
|
||||
def get_openimages(name, root, image_set, transforms):
|
||||
PATHS = {
|
||||
"train": os.path.join(root, "train"),
|
||||
@@ -235,10 +102,8 @@ def get_openimages(name, root, image_set, transforms):
|
||||
|
||||
return dataset
|
||||
|
||||
# https://github.com/mlcommons/training/blob/cdd928d4596c142c15a7d86b2eeadbac718c8da2/single_stage_detector/ssd/model/retinanet.py#L401
|
||||
# NOTE: this applies the following filtering in https://github.com/mlcommons/training/blob/cdd928d4596c142c15a7d86b2eeadbac718c8da2/single_stage_detector/ssd/model/retinanet.py#L117
|
||||
# and https://github.com/mlcommons/training/blob/cdd928d4596c142c15a7d86b2eeadbac718c8da2/single_stage_detector/ssd/model/retinanet.py#L203 to match with
|
||||
# tinygrad's dataloader implementation
|
||||
# This applies the filtering in https://github.com/mlcommons/training/blob/cdd928d4596c142c15a7d86b2eeadbac718c8da2/single_stage_detector/ssd/model/retinanet.py#L117
|
||||
# and https://github.com/mlcommons/training/blob/cdd928d4596c142c15a7d86b2eeadbac718c8da2/single_stage_detector/ssd/model/retinanet.py#L203 to match with tinygrad's dataloader implementation.
|
||||
def postprocess_targets(targets, anchors):
|
||||
proposal_matcher, matched_idxs = Matcher(0.5, 0.4, allow_low_quality_matches=True), []
|
||||
for anchors_per_image, targets_per_image in zip(anchors, targets):
|
||||
|
||||
14
test/external/mlperf_retinanet/presets.py
vendored
14
test/external/mlperf_retinanet/presets.py
vendored
@@ -10,20 +10,6 @@ class DetectionPresetTrain:
|
||||
T.RandomHorizontalFlip(p=hflip_prob),
|
||||
T.ToTensor(),
|
||||
])
|
||||
elif data_augmentation == 'ssd':
|
||||
self.transforms = T.Compose([
|
||||
T.RandomPhotometricDistort(),
|
||||
T.RandomZoomOut(fill=list(mean)),
|
||||
T.RandomIoUCrop(),
|
||||
T.RandomHorizontalFlip(p=hflip_prob),
|
||||
T.ToTensor(),
|
||||
])
|
||||
elif data_augmentation == 'ssdlite':
|
||||
self.transforms = T.Compose([
|
||||
T.RandomIoUCrop(),
|
||||
T.RandomHorizontalFlip(p=hflip_prob),
|
||||
T.ToTensor(),
|
||||
])
|
||||
else:
|
||||
raise ValueError(f'Unknown data augmentation policy "{data_augmentation}"')
|
||||
|
||||
|
||||
215
test/external/mlperf_retinanet/transforms.py
vendored
215
test/external/mlperf_retinanet/transforms.py
vendored
@@ -63,28 +63,6 @@ def get_image_size(img: Tensor) -> List[int]:
|
||||
|
||||
return get_image_size_pil(img)
|
||||
|
||||
def get_image_num_channels_tensor(img: Tensor) -> int:
|
||||
_assert_image_tensor(img)
|
||||
if img.ndim == 2:
|
||||
return 1
|
||||
elif img.ndim > 2:
|
||||
return img.shape[-3]
|
||||
|
||||
raise TypeError(f"Input ndim should be 2 or more. Got {img.ndim}")
|
||||
|
||||
@torch.jit.unused
|
||||
def get_image_num_channels_pil(img: Any) -> int:
|
||||
if _is_pil_image(img):
|
||||
return len(img.getbands())
|
||||
raise TypeError("Unexpected type {}".format(type(img)))
|
||||
|
||||
def get_image_num_channels(img: Tensor) -> int:
|
||||
if isinstance(img, torch.Tensor):
|
||||
return get_image_num_channels_tensor(img)
|
||||
|
||||
return get_image_num_channels_pil(img)
|
||||
################################################################################
|
||||
|
||||
class Compose(object):
|
||||
def __init__(self, transforms):
|
||||
self.transforms = transforms
|
||||
@@ -119,193 +97,6 @@ class ToTensor(nn.Module):
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomIoUCrop(nn.Module):
|
||||
def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5,
|
||||
max_aspect_ratio: float = 2.0, sampler_options: Optional[List[float]] = None, trials: int = 40):
|
||||
super().__init__()
|
||||
# Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174
|
||||
self.min_scale = min_scale
|
||||
self.max_scale = max_scale
|
||||
self.min_aspect_ratio = min_aspect_ratio
|
||||
self.max_aspect_ratio = max_aspect_ratio
|
||||
if sampler_options is None:
|
||||
sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
|
||||
self.options = sampler_options
|
||||
self.trials = trials
|
||||
|
||||
def forward(self, image: Tensor,
|
||||
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||
if target is None:
|
||||
raise ValueError("The targets can't be None for this transform.")
|
||||
|
||||
if isinstance(image, torch.Tensor):
|
||||
if image.ndimension() not in {2, 3}:
|
||||
raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
|
||||
elif image.ndimension() == 2:
|
||||
image = image.unsqueeze(0)
|
||||
|
||||
orig_w, orig_h = get_image_size(image)
|
||||
|
||||
while True:
|
||||
# sample an option
|
||||
idx = int(torch.randint(low=0, high=len(self.options), size=(1,)))
|
||||
min_jaccard_overlap = self.options[idx]
|
||||
if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option
|
||||
return image, target
|
||||
|
||||
for _ in range(self.trials):
|
||||
# check the aspect ratio limitations
|
||||
r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2)
|
||||
new_w = int(orig_w * r[0])
|
||||
new_h = int(orig_h * r[1])
|
||||
aspect_ratio = new_w / new_h
|
||||
if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio):
|
||||
continue
|
||||
|
||||
# check for 0 area crops
|
||||
r = torch.rand(2)
|
||||
left = int((orig_w - new_w) * r[0])
|
||||
top = int((orig_h - new_h) * r[1])
|
||||
right = left + new_w
|
||||
bottom = top + new_h
|
||||
if left == right or top == bottom:
|
||||
continue
|
||||
|
||||
# check for any valid boxes with centers within the crop area
|
||||
cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2])
|
||||
cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3])
|
||||
is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom)
|
||||
if not is_within_crop_area.any():
|
||||
continue
|
||||
|
||||
# check at least 1 box with jaccard limitations
|
||||
boxes = target["boxes"][is_within_crop_area]
|
||||
ious = torchvision.ops.boxes.box_iou(boxes, torch.tensor([[left, top, right, bottom]],
|
||||
dtype=boxes.dtype, device=boxes.device))
|
||||
if ious.max() < min_jaccard_overlap:
|
||||
continue
|
||||
|
||||
# keep only valid boxes and perform cropping
|
||||
target["boxes"] = boxes
|
||||
target["labels"] = target["labels"][is_within_crop_area]
|
||||
target["boxes"][:, 0::2] -= left
|
||||
target["boxes"][:, 1::2] -= top
|
||||
target["boxes"][:, 0::2].clamp_(min=0, max=new_w)
|
||||
target["boxes"][:, 1::2].clamp_(min=0, max=new_h)
|
||||
image = F.crop(image, top, left, new_h, new_w)
|
||||
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomZoomOut(nn.Module):
|
||||
def __init__(self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1., 4.), p: float = 0.5):
|
||||
super().__init__()
|
||||
if fill is None:
|
||||
fill = [0., 0., 0.]
|
||||
self.fill = fill
|
||||
self.side_range = side_range
|
||||
if side_range[0] < 1. or side_range[0] > side_range[1]:
|
||||
raise ValueError("Invalid canvas side range provided {}.".format(side_range))
|
||||
self.p = p
|
||||
|
||||
@torch.jit.unused
|
||||
def _get_fill_value(self, is_pil):
|
||||
# type: (bool) -> int
|
||||
# We fake the type to make it work on JIT
|
||||
return tuple(int(x) for x in self.fill) if is_pil else 0
|
||||
|
||||
def forward(self, image: Tensor,
|
||||
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||
if isinstance(image, torch.Tensor):
|
||||
if image.ndimension() not in {2, 3}:
|
||||
raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
|
||||
elif image.ndimension() == 2:
|
||||
image = image.unsqueeze(0)
|
||||
|
||||
if torch.rand(1) < self.p:
|
||||
return image, target
|
||||
|
||||
orig_w, orig_h = get_image_size(image)
|
||||
|
||||
r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
|
||||
canvas_width = int(orig_w * r)
|
||||
canvas_height = int(orig_h * r)
|
||||
|
||||
r = torch.rand(2)
|
||||
left = int((canvas_width - orig_w) * r[0])
|
||||
top = int((canvas_height - orig_h) * r[1])
|
||||
right = canvas_width - (left + orig_w)
|
||||
bottom = canvas_height - (top + orig_h)
|
||||
|
||||
if torch.jit.is_scripting():
|
||||
fill = 0
|
||||
else:
|
||||
fill = self._get_fill_value(_is_pil_image(image))
|
||||
|
||||
image = F.pad(image, [left, top, right, bottom], fill=fill)
|
||||
if isinstance(image, torch.Tensor):
|
||||
v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1)
|
||||
image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h):, :] = \
|
||||
image[..., :, (left + orig_w):] = v
|
||||
|
||||
if target is not None:
|
||||
target["boxes"][:, 0::2] += left
|
||||
target["boxes"][:, 1::2] += top
|
||||
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomPhotometricDistort(nn.Module):
|
||||
def __init__(self, contrast: Tuple[float] = (0.5, 1.5), saturation: Tuple[float] = (0.5, 1.5),
|
||||
hue: Tuple[float] = (-0.05, 0.05), brightness: Tuple[float] = (0.875, 1.125), p: float = 0.5):
|
||||
super().__init__()
|
||||
self._brightness = T.ColorJitter(brightness=brightness)
|
||||
self._contrast = T.ColorJitter(contrast=contrast)
|
||||
self._hue = T.ColorJitter(hue=hue)
|
||||
self._saturation = T.ColorJitter(saturation=saturation)
|
||||
self.p = p
|
||||
|
||||
def forward(self, image: Tensor,
|
||||
target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
|
||||
if isinstance(image, torch.Tensor):
|
||||
if image.ndimension() not in {2, 3}:
|
||||
raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension()))
|
||||
elif image.ndimension() == 2:
|
||||
image = image.unsqueeze(0)
|
||||
|
||||
r = torch.rand(7)
|
||||
|
||||
if r[0] < self.p:
|
||||
image = self._brightness(image)
|
||||
|
||||
contrast_before = r[1] < 0.5
|
||||
if contrast_before:
|
||||
if r[2] < self.p:
|
||||
image = self._contrast(image)
|
||||
|
||||
if r[3] < self.p:
|
||||
image = self._saturation(image)
|
||||
|
||||
if r[4] < self.p:
|
||||
image = self._hue(image)
|
||||
|
||||
if not contrast_before:
|
||||
if r[5] < self.p:
|
||||
image = self._contrast(image)
|
||||
|
||||
if r[6] < self.p:
|
||||
channels = get_image_num_channels(image)
|
||||
permutation = torch.randperm(channels)
|
||||
|
||||
is_pil = _is_pil_image(image)
|
||||
if is_pil:
|
||||
image = F.to_tensor(image)
|
||||
image = image[..., permutation, :, :]
|
||||
if is_pil:
|
||||
image = F.to_pil_image(image)
|
||||
|
||||
return image, target
|
||||
|
||||
import math
|
||||
import torch
|
||||
import torchvision
|
||||
@@ -323,12 +114,6 @@ def _get_shape_onnx(image: Tensor) -> Tensor:
|
||||
return operators.shape_as_tensor(image)[-2:]
|
||||
|
||||
|
||||
@torch.jit.unused
|
||||
def _fake_cast_onnx(v: Tensor) -> float:
|
||||
# ONNX requires a tensor but here we fake its type for JIT.
|
||||
return v
|
||||
|
||||
|
||||
def _resize_image_and_masks(image: Tensor,
|
||||
target: Optional[Dict[str, Tensor]] = None,
|
||||
image_size: Optional[Tuple[int, int]] = None,
|
||||
|
||||
Reference in New Issue
Block a user