2 Copyright (c) 2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
20 from ..config import BoolField
21 from ..utils import read_json, convert_bboxes_xywh_to_x1y1x2y2
22 from ..representation import DetectionAnnotation, PoseEstimationAnnotation
23 from .format_converter import BaseFormatConverter, FileBasedAnnotationConverter, FileBasedAnnotationConverterConfig
26 def get_image_annotation(image_id, annotations_):
27 return list(filter(lambda x: x['image_id'] == image_id, annotations_))
30 def get_label_map(full_annotation, use_full_label_map=False, has_background=False):
31 labels = full_annotation['categories']
33 if not use_full_label_map:
34 label_offset = 1 if has_background else 0
35 label_id_to_label = {label['id']: label_id + label_offset for label_id, label in enumerate(labels)}
36 label_map = {label_id + label_offset: label['name'] for label_id, label in enumerate(labels)}
38 label_id_to_label = {label['id']: label['id'] for label in labels}
39 label_map = {label['id']: label['name'] for label in labels}
41 return label_map, label_id_to_label
44 class MSCocoDetectionConverterConfig(FileBasedAnnotationConverterConfig):
45 has_background = BoolField(optional=True)
46 use_full_label_map = BoolField(optional=True)
49 class MSCocoDetectionConverter(BaseFormatConverter):
50 __provider__ = 'mscoco_detection'
52 _config_validator_type = MSCocoDetectionConverterConfig
55 self.annotation_file = self.config['annotation_file']
56 self.has_background = self.config.get('has_background', False)
57 self.use_full_label_map = self.config.get('use_full_label_map', False)
60 detection_annotations = []
61 full_annotation = read_json(self.annotation_file)
62 image_info = full_annotation['images']
63 annotations = full_annotation['annotations']
65 label_map, label_id_to_label = get_label_map(full_annotation, self.use_full_label_map, self.has_background)
68 if self.has_background:
69 label_map[0] = 'background'
70 meta['background_label'] = 0
72 meta.update({'label_map': label_map})
74 for image in tqdm(image_info):
75 identifier = image['file_name']
76 image_annotation = get_image_annotation(image['id'], annotations)
77 image_labels = [label_id_to_label[annotation['category_id']] for annotation in image_annotation]
78 xmins = [annotation['bbox'][0] for annotation in image_annotation]
79 ymins = [annotation['bbox'][1] for annotation in image_annotation]
80 widths = [annotation['bbox'][2] for annotation in image_annotation]
81 heights = [annotation['bbox'][3] for annotation in image_annotation]
82 xmaxs = np.add(xmins, widths)
83 ymaxs = np.add(ymins, heights)
84 is_crowd = [annotation['iscrowd'] for annotation in image_annotation]
85 detection_annotation = DetectionAnnotation(identifier, image_labels, xmins, ymins, xmaxs, ymaxs)
86 detection_annotation.metadata['iscrowd'] = is_crowd
87 detection_annotations.append(detection_annotation)
89 return detection_annotations, meta
92 class MSCocoKeypointsConverter(FileBasedAnnotationConverter):
93 __provider__ = 'mscoco_keypoints'
96 keypoints_annotations = []
98 full_annotation = read_json(self.annotation_file)
99 image_info = full_annotation['images']
100 annotations = full_annotation['annotations']
101 label_map, _ = get_label_map(full_annotation, True)
102 for image in image_info:
103 identifier = image['file_name']
104 image_annotation = get_image_annotation(image['id'], annotations)
105 if not image_annotation:
107 x_vals, y_vals, visibility, labels, areas, is_crowd, bboxes, difficult = [], [], [], [], [], [], [], []
108 for target in image_annotation:
109 if target['num_keypoints'] == 0:
110 difficult.append(len(x_vals))
111 labels.append(target['category_id'])
112 keypoints = target['keypoints']
113 x_vals.append(keypoints[::3])
114 y_vals.append(keypoints[1::3])
115 visibility.append(keypoints[2::3])
116 areas.append(target['area'])
117 bboxes.append(convert_bboxes_xywh_to_x1y1x2y2(*target['bbox']))
118 is_crowd.append(target['iscrowd'])
119 keypoints_annotation = PoseEstimationAnnotation(
120 identifier, np.array(x_vals), np.array(y_vals), np.array(visibility), np.array(labels)
122 keypoints_annotation.metadata['areas'] = areas
123 keypoints_annotation.metadata['rects'] = bboxes
124 keypoints_annotation.metadata['iscrowd'] = is_crowd
125 keypoints_annotation.metadata['difficult_boxes'] = difficult
127 keypoints_annotations.append(keypoints_annotation)
129 return keypoints_annotations, {'label_map': label_map}