From ec146da5cf54f9417016e57b40add9e27d679aee Mon Sep 17 00:00:00 2001 From: Francis Lata Date: Tue, 22 Oct 2024 02:48:11 -0700 Subject: [PATCH] trim dataloader related code needed from ref --- test/external/mlperf_retinanet/openimages.py | 139 +----------- test/external/mlperf_retinanet/presets.py | 14 -- test/external/mlperf_retinanet/transforms.py | 215 ------------------- 3 files changed, 2 insertions(+), 366 deletions(-) diff --git a/test/external/mlperf_retinanet/openimages.py b/test/external/mlperf_retinanet/openimages.py index 9642824368..f2b2df36ca 100644 --- a/test/external/mlperf_retinanet/openimages.py +++ b/test/external/mlperf_retinanet/openimages.py @@ -18,23 +18,6 @@ from test.external.mlperf_retinanet.boxes import box_iou from test.external.mlperf_retinanet.utils import Matcher -def convert_coco_poly_to_mask(segmentations, height, width): - masks = [] - for polygons in segmentations: - rles = coco_mask.frPyObjects(polygons, height, width) - mask = coco_mask.decode(rles) - if len(mask.shape) < 3: - mask = mask[..., None] - mask = torch.as_tensor(mask, dtype=torch.uint8) - mask = mask.any(dim=2) - masks.append(mask) - if masks: - masks = torch.stack(masks, dim=0) - else: - masks = torch.zeros((0, height, width), dtype=torch.uint8) - return masks - - class ConvertCocoPolysToMask(object): def __init__(self, filter_iscrowd=True): self.filter_iscrowd = filter_iscrowd @@ -86,97 +69,6 @@ class ConvertCocoPolysToMask(object): return image, target -def _coco_remove_images_without_annotations(dataset, cat_list=None): - def _has_only_empty_bbox(anno): - return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) - - def _count_visible_keypoints(anno): - return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) - - min_keypoints_per_image = 10 - - def _has_valid_annotation(anno): - # if it's empty, there is no annotation - if len(anno) == 0: - return False - # if all boxes have close to zero area, there is no annotation - if _has_only_empty_bbox(anno): - return False - # keypoints task have a slight different critera for considering - # if an annotation is valid - if "keypoints" not in anno[0]: - return True - # for keypoint detection tasks, only consider valid images those - # containing at least min_keypoints_per_image - if _count_visible_keypoints(anno) >= min_keypoints_per_image: - return True - return False - - assert isinstance(dataset, torchvision.datasets.CocoDetection) - ids = [] - for ds_idx, img_id in enumerate(dataset.ids): - ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) - anno = dataset.coco.loadAnns(ann_ids) - if cat_list: - anno = [obj for obj in anno if obj["category_id"] in cat_list] - if _has_valid_annotation(anno): - ids.append(ds_idx) - - dataset = torch.utils.data.Subset(dataset, ids) - return dataset - - -def convert_to_coco_api(ds): - coco_ds = COCO() - # annotation IDs need to start at 1, not 0, see torchvision issue #1530 - ann_id = 1 - dataset = {'images': [], 'categories': [], 'annotations': []} - categories = set() - for img_idx in range(len(ds)): - # find better way to get target - # targets = ds.get_annotations(img_idx) - img, targets = ds[img_idx] - image_id = targets["image_id"].item() - img_dict = {} - img_dict['id'] = image_id - img_dict['height'] = img.shape[-2] - img_dict['width'] = img.shape[-1] - dataset['images'].append(img_dict) - bboxes = targets["boxes"] - bboxes[:, 2:] -= bboxes[:, :2] - bboxes = bboxes.tolist() - labels = targets['labels'].tolist() - areas = targets['area'].tolist() - iscrowd = targets['iscrowd'].tolist() - num_objs = len(bboxes) - for i in range(num_objs): - ann = {} - ann['image_id'] = image_id - ann['bbox'] = bboxes[i] - ann['category_id'] = labels[i] - categories.add(labels[i]) - ann['area'] = areas[i] - ann['iscrowd'] = iscrowd[i] - ann['id'] = ann_id - dataset['annotations'].append(ann) - ann_id += 1 - dataset['categories'] = [{'id': i} for i in sorted(categories)] - coco_ds.dataset = dataset - coco_ds.createIndex() - return coco_ds - - -def get_coco_api_from_dataset(dataset): - for _ in range(10): - if isinstance(dataset, torchvision.datasets.CocoDetection): - break - if isinstance(dataset, torch.utils.data.Subset): - dataset = dataset.dataset - if isinstance(dataset, torchvision.datasets.CocoDetection): - return dataset.coco - return convert_to_coco_api(dataset) - - class CocoDetection(torchvision.datasets.CocoDetection): def __init__(self, img_folder, ann_file, transforms): super(CocoDetection, self).__init__(img_folder, ann_file) @@ -191,31 +83,6 @@ class CocoDetection(torchvision.datasets.CocoDetection): return img, target -def get_coco(name, root, image_set, transforms, mode='instances'): - anno_file_template = "{}_{}2017.json" - PATHS = { - "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))), - "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))), - } - - t = [ConvertCocoPolysToMask(filter_iscrowd=True)] - - if transforms is not None: - t.append(transforms) - transforms = T.Compose(t) - - img_folder, ann_file = PATHS[image_set] - img_folder = os.path.join(root, img_folder) - ann_file = os.path.join(root, ann_file) - - dataset = CocoDetection(img_folder, ann_file, transforms=transforms) - - if image_set == "train": - dataset = _coco_remove_images_without_annotations(dataset) - - return dataset - - def get_openimages(name, root, image_set, transforms): PATHS = { "train": os.path.join(root, "train"), @@ -235,10 +102,8 @@ def get_openimages(name, root, image_set, transforms): return dataset -# https://github.com/mlcommons/training/blob/cdd928d4596c142c15a7d86b2eeadbac718c8da2/single_stage_detector/ssd/model/retinanet.py#L401 -# NOTE: this applies the following filtering in https://github.com/mlcommons/training/blob/cdd928d4596c142c15a7d86b2eeadbac718c8da2/single_stage_detector/ssd/model/retinanet.py#L117 -# and https://github.com/mlcommons/training/blob/cdd928d4596c142c15a7d86b2eeadbac718c8da2/single_stage_detector/ssd/model/retinanet.py#L203 to match with -# tinygrad's dataloader implementation +# This applies the filtering in https://github.com/mlcommons/training/blob/cdd928d4596c142c15a7d86b2eeadbac718c8da2/single_stage_detector/ssd/model/retinanet.py#L117 +# and https://github.com/mlcommons/training/blob/cdd928d4596c142c15a7d86b2eeadbac718c8da2/single_stage_detector/ssd/model/retinanet.py#L203 to match with tinygrad's dataloader implementation. def postprocess_targets(targets, anchors): proposal_matcher, matched_idxs = Matcher(0.5, 0.4, allow_low_quality_matches=True), [] for anchors_per_image, targets_per_image in zip(anchors, targets): diff --git a/test/external/mlperf_retinanet/presets.py b/test/external/mlperf_retinanet/presets.py index e697b96d54..9a3aa09661 100644 --- a/test/external/mlperf_retinanet/presets.py +++ b/test/external/mlperf_retinanet/presets.py @@ -10,20 +10,6 @@ class DetectionPresetTrain: T.RandomHorizontalFlip(p=hflip_prob), T.ToTensor(), ]) - elif data_augmentation == 'ssd': - self.transforms = T.Compose([ - T.RandomPhotometricDistort(), - T.RandomZoomOut(fill=list(mean)), - T.RandomIoUCrop(), - T.RandomHorizontalFlip(p=hflip_prob), - T.ToTensor(), - ]) - elif data_augmentation == 'ssdlite': - self.transforms = T.Compose([ - T.RandomIoUCrop(), - T.RandomHorizontalFlip(p=hflip_prob), - T.ToTensor(), - ]) else: raise ValueError(f'Unknown data augmentation policy "{data_augmentation}"') diff --git a/test/external/mlperf_retinanet/transforms.py b/test/external/mlperf_retinanet/transforms.py index e50c08d1ee..e63c26d87d 100644 --- a/test/external/mlperf_retinanet/transforms.py +++ b/test/external/mlperf_retinanet/transforms.py @@ -63,28 +63,6 @@ def get_image_size(img: Tensor) -> List[int]: return get_image_size_pil(img) -def get_image_num_channels_tensor(img: Tensor) -> int: - _assert_image_tensor(img) - if img.ndim == 2: - return 1 - elif img.ndim > 2: - return img.shape[-3] - - raise TypeError(f"Input ndim should be 2 or more. Got {img.ndim}") - -@torch.jit.unused -def get_image_num_channels_pil(img: Any) -> int: - if _is_pil_image(img): - return len(img.getbands()) - raise TypeError("Unexpected type {}".format(type(img))) - -def get_image_num_channels(img: Tensor) -> int: - if isinstance(img, torch.Tensor): - return get_image_num_channels_tensor(img) - - return get_image_num_channels_pil(img) -################################################################################ - class Compose(object): def __init__(self, transforms): self.transforms = transforms @@ -119,193 +97,6 @@ class ToTensor(nn.Module): return image, target -class RandomIoUCrop(nn.Module): - def __init__(self, min_scale: float = 0.3, max_scale: float = 1.0, min_aspect_ratio: float = 0.5, - max_aspect_ratio: float = 2.0, sampler_options: Optional[List[float]] = None, trials: int = 40): - super().__init__() - # Configuration similar to https://github.com/weiliu89/caffe/blob/ssd/examples/ssd/ssd_coco.py#L89-L174 - self.min_scale = min_scale - self.max_scale = max_scale - self.min_aspect_ratio = min_aspect_ratio - self.max_aspect_ratio = max_aspect_ratio - if sampler_options is None: - sampler_options = [0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0] - self.options = sampler_options - self.trials = trials - - def forward(self, image: Tensor, - target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: - if target is None: - raise ValueError("The targets can't be None for this transform.") - - if isinstance(image, torch.Tensor): - if image.ndimension() not in {2, 3}: - raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension())) - elif image.ndimension() == 2: - image = image.unsqueeze(0) - - orig_w, orig_h = get_image_size(image) - - while True: - # sample an option - idx = int(torch.randint(low=0, high=len(self.options), size=(1,))) - min_jaccard_overlap = self.options[idx] - if min_jaccard_overlap >= 1.0: # a value larger than 1 encodes the leave as-is option - return image, target - - for _ in range(self.trials): - # check the aspect ratio limitations - r = self.min_scale + (self.max_scale - self.min_scale) * torch.rand(2) - new_w = int(orig_w * r[0]) - new_h = int(orig_h * r[1]) - aspect_ratio = new_w / new_h - if not (self.min_aspect_ratio <= aspect_ratio <= self.max_aspect_ratio): - continue - - # check for 0 area crops - r = torch.rand(2) - left = int((orig_w - new_w) * r[0]) - top = int((orig_h - new_h) * r[1]) - right = left + new_w - bottom = top + new_h - if left == right or top == bottom: - continue - - # check for any valid boxes with centers within the crop area - cx = 0.5 * (target["boxes"][:, 0] + target["boxes"][:, 2]) - cy = 0.5 * (target["boxes"][:, 1] + target["boxes"][:, 3]) - is_within_crop_area = (left < cx) & (cx < right) & (top < cy) & (cy < bottom) - if not is_within_crop_area.any(): - continue - - # check at least 1 box with jaccard limitations - boxes = target["boxes"][is_within_crop_area] - ious = torchvision.ops.boxes.box_iou(boxes, torch.tensor([[left, top, right, bottom]], - dtype=boxes.dtype, device=boxes.device)) - if ious.max() < min_jaccard_overlap: - continue - - # keep only valid boxes and perform cropping - target["boxes"] = boxes - target["labels"] = target["labels"][is_within_crop_area] - target["boxes"][:, 0::2] -= left - target["boxes"][:, 1::2] -= top - target["boxes"][:, 0::2].clamp_(min=0, max=new_w) - target["boxes"][:, 1::2].clamp_(min=0, max=new_h) - image = F.crop(image, top, left, new_h, new_w) - - return image, target - - -class RandomZoomOut(nn.Module): - def __init__(self, fill: Optional[List[float]] = None, side_range: Tuple[float, float] = (1., 4.), p: float = 0.5): - super().__init__() - if fill is None: - fill = [0., 0., 0.] - self.fill = fill - self.side_range = side_range - if side_range[0] < 1. or side_range[0] > side_range[1]: - raise ValueError("Invalid canvas side range provided {}.".format(side_range)) - self.p = p - - @torch.jit.unused - def _get_fill_value(self, is_pil): - # type: (bool) -> int - # We fake the type to make it work on JIT - return tuple(int(x) for x in self.fill) if is_pil else 0 - - def forward(self, image: Tensor, - target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: - if isinstance(image, torch.Tensor): - if image.ndimension() not in {2, 3}: - raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension())) - elif image.ndimension() == 2: - image = image.unsqueeze(0) - - if torch.rand(1) < self.p: - return image, target - - orig_w, orig_h = get_image_size(image) - - r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0]) - canvas_width = int(orig_w * r) - canvas_height = int(orig_h * r) - - r = torch.rand(2) - left = int((canvas_width - orig_w) * r[0]) - top = int((canvas_height - orig_h) * r[1]) - right = canvas_width - (left + orig_w) - bottom = canvas_height - (top + orig_h) - - if torch.jit.is_scripting(): - fill = 0 - else: - fill = self._get_fill_value(_is_pil_image(image)) - - image = F.pad(image, [left, top, right, bottom], fill=fill) - if isinstance(image, torch.Tensor): - v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1) - image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h):, :] = \ - image[..., :, (left + orig_w):] = v - - if target is not None: - target["boxes"][:, 0::2] += left - target["boxes"][:, 1::2] += top - - return image, target - - -class RandomPhotometricDistort(nn.Module): - def __init__(self, contrast: Tuple[float] = (0.5, 1.5), saturation: Tuple[float] = (0.5, 1.5), - hue: Tuple[float] = (-0.05, 0.05), brightness: Tuple[float] = (0.875, 1.125), p: float = 0.5): - super().__init__() - self._brightness = T.ColorJitter(brightness=brightness) - self._contrast = T.ColorJitter(contrast=contrast) - self._hue = T.ColorJitter(hue=hue) - self._saturation = T.ColorJitter(saturation=saturation) - self.p = p - - def forward(self, image: Tensor, - target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: - if isinstance(image, torch.Tensor): - if image.ndimension() not in {2, 3}: - raise ValueError('image should be 2/3 dimensional. Got {} dimensions.'.format(image.ndimension())) - elif image.ndimension() == 2: - image = image.unsqueeze(0) - - r = torch.rand(7) - - if r[0] < self.p: - image = self._brightness(image) - - contrast_before = r[1] < 0.5 - if contrast_before: - if r[2] < self.p: - image = self._contrast(image) - - if r[3] < self.p: - image = self._saturation(image) - - if r[4] < self.p: - image = self._hue(image) - - if not contrast_before: - if r[5] < self.p: - image = self._contrast(image) - - if r[6] < self.p: - channels = get_image_num_channels(image) - permutation = torch.randperm(channels) - - is_pil = _is_pil_image(image) - if is_pil: - image = F.to_tensor(image) - image = image[..., permutation, :, :] - if is_pil: - image = F.to_pil_image(image) - - return image, target - import math import torch import torchvision @@ -323,12 +114,6 @@ def _get_shape_onnx(image: Tensor) -> Tensor: return operators.shape_as_tensor(image)[-2:] -@torch.jit.unused -def _fake_cast_onnx(v: Tensor) -> float: - # ONNX requires a tensor but here we fake its type for JIT. - return v - - def _resize_image_and_masks(image: Tensor, target: Optional[Dict[str, Tensor]] = None, image_size: Optional[Tuple[int, int]] = None,