2 Copyright (c) 2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
19 from ..config import PathField, BoolField
20 from ..representation import DetectionAnnotation, SegmentationAnnotation
21 from ..representation.segmentation_representation import GTMaskLoader
22 from ..utils import get_path, read_txt, read_xml
23 from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
25 _VOC_CLASSES_DETECTION = (
26 'aeroplane', 'bicycle', 'bird', 'boat',
27 'bottle', 'bus', 'car', 'cat', 'chair',
28 'cow', 'diningtable', 'dog', 'horse',
29 'motorbike', 'person', 'pottedplant',
30 'sheep', 'sofa', 'train', 'tvmonitor'
33 _VOC_CLASSES_SEGMENTATION = tuple(['__background__']) + _VOC_CLASSES_DETECTION
34 _SEGMENTATION_COLORS = ((
35 (0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0),
36 (0, 0, 128), (128, 0, 128), (0, 128, 128), (128, 128, 128),
37 (64, 0, 0), (192, 0, 0), (64, 128, 0), (192, 128, 0),
38 (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
39 (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0),
44 def prepare_detection_labels(has_background=True):
45 num_classes = len(_VOC_CLASSES_DETECTION)
46 labels_shift = 1 if has_background else 0
47 reversed_label_map = dict(zip(_VOC_CLASSES_DETECTION, list(range(labels_shift, num_classes + labels_shift))))
49 reversed_label_map['__background__'] = 0
51 return reversed_label_map
54 def reverse_label_map(label_map):
55 return {value: key for key, value in label_map.items()}
58 class PascalVOCSegmentationConverterConfig(BaseFormatConverterConfig):
59 image_set_file = PathField()
60 images_dir = PathField(optional=True, is_directory=True)
61 mask_dir = PathField(optional=True, is_directory=True)
64 class PascalVOCSegmentationConverter(BaseFormatConverter):
65 __provider__ = 'voc_segmentation'
67 _config_validator_type = PascalVOCSegmentationConverterConfig
70 self.image_set_file = self.config['image_set_file']
71 self.image_dir = self.config.get('images_dir')
72 if not self.image_dir:
73 self.image_dir = get_path(self.image_set_file.parent / 'JPEGImages')
75 self.mask_dir = self.config.get('mask_dir')
77 self.mask_dir = get_path(self.image_set_file.parent / 'SegmentationClass')
82 for image in read_txt(self.image_set_file):
83 annotation = SegmentationAnnotation(
84 str(self.image_dir.name / '{}.jpg'.format(image)),
85 str(self.mask_dir.name / '{}.png'.format(image)),
86 mask_loader=GTMaskLoader.SCIPY
89 annotations.append(annotation)
92 'label_map': dict(enumerate(_VOC_CLASSES_SEGMENTATION)),
93 'background_label': 0,
94 'segmentation_colors': _SEGMENTATION_COLORS
97 return annotations, meta
100 class PascalVOCDetectionConverterConfig(BaseFormatConverterConfig):
101 image_set_file = PathField()
102 annotations_dir = PathField(is_directory=True)
103 images_dir = PathField(optional=True, is_directory=True)
104 has_background = BoolField(optional=True)
107 class PascalVOCDetectionConverter(BaseFormatConverter):
108 __provider__ = 'voc07'
110 _config_validator_type = PascalVOCDetectionConverterConfig
113 self.image_set_file = self.config['image_set_file']
114 self.image_dir = self.config.get('images_dir')
115 if not self.image_dir:
116 self.image_dir = get_path(self.image_set_file.parent / 'JPEGImages')
117 self.annotations_dir = self.config['annotations_dir']
118 self.has_background = self.config.get('has_background', True)
121 class_to_ind = prepare_detection_labels(self.has_background)
124 for image in tqdm(read_txt(self.image_set_file, sep=None)):
125 root = read_xml(self.annotations_dir / '{}.xml'.format(image))
127 identifier = root.find('.//filename').text
128 get_path(self.image_dir / identifier)
130 labels, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], []
131 difficult_indices = []
133 if not entry.tag.startswith('object'):
136 bbox = entry.find('bndbox')
137 difficult = int(entry.find('difficult').text)
140 difficult_indices.append(len(labels))
142 labels.append(class_to_ind[entry.find('name').text])
143 x_mins.append(float(bbox.find('xmin').text) - 1)
144 y_mins.append(float(bbox.find('ymin').text) - 1)
145 x_maxs.append(float(bbox.find('xmax').text) - 1)
146 y_maxs.append(float(bbox.find('ymax').text) - 1)
148 image_annotation = DetectionAnnotation(identifier, labels, x_mins, y_mins, x_maxs, y_maxs)
149 image_annotation.metadata['difficult_boxes'] = difficult_indices
151 detections.append(image_annotation)
153 meta = {'label_map': reverse_label_map(class_to_ind)}
154 if self.has_background:
155 meta['background_label'] = 0
157 return detections, meta