2 Copyright (c) 2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
18 from pathlib import Path
20 from ..config import PathField, BoolField
21 from ..representation import DetectionAnnotation, SegmentationAnnotation
22 from ..representation.segmentation_representation import GTMaskLoader
23 from ..utils import get_path, read_txt, read_xml
24 from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
26 _VOC_CLASSES_DETECTION = (
27 'aeroplane', 'bicycle', 'bird', 'boat',
28 'bottle', 'bus', 'car', 'cat', 'chair',
29 'cow', 'diningtable', 'dog', 'horse',
30 'motorbike', 'person', 'pottedplant',
31 'sheep', 'sofa', 'train', 'tvmonitor'
34 _VOC_CLASSES_SEGMENTATION = tuple(['__background__']) + _VOC_CLASSES_DETECTION
35 _SEGMENTATION_COLORS = ((
36 (0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0),
37 (0, 0, 128), (128, 0, 128), (0, 128, 128), (128, 128, 128),
38 (64, 0, 0), (192, 0, 0), (64, 128, 0), (192, 128, 0),
39 (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
40 (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0),
45 def prepare_detection_labels(has_background=True):
46 num_classes = len(_VOC_CLASSES_DETECTION)
47 labels_shift = 1 if has_background else 0
48 reversed_label_map = dict(zip(_VOC_CLASSES_DETECTION, list(range(labels_shift, num_classes + labels_shift))))
50 reversed_label_map['__background__'] = 0
52 return reversed_label_map
55 def reverse_label_map(label_map):
56 return {value: key for key, value in label_map.items()}
59 class PascalVOCSegmentationConverterConfig(BaseFormatConverterConfig):
60 image_set_file = PathField()
61 images_dir = PathField(optional=True, is_directory=True)
62 mask_dir = PathField(optional=True, is_directory=True)
65 class PascalVOCSegmentationConverter(BaseFormatConverter):
66 __provider__ = 'voc_segmentation'
68 _config_validator_type = PascalVOCSegmentationConverterConfig
71 self.image_set_file = self.config['image_set_file']
72 self.image_dir = self.config.get('images_dir')
73 if not self.image_dir:
74 self.image_dir = get_path(self.image_set_file.parents[-2] / 'JPEGImages', is_directory=True)
76 self.mask_dir = self.config.get('mask_dir')
78 self.mask_dir = get_path(self.image_set_file.parents[-2] / 'SegmentationClass', is_directory=True)
83 for image in read_txt(self.image_set_file):
84 annotation = SegmentationAnnotation(
85 str(Path(self.image_dir.name) / '{}.jpg'.format(image)),
86 str(Path(self.mask_dir.name) / '{}.png'.format(image)),
87 mask_loader=GTMaskLoader.SCIPY
90 annotations.append(annotation)
93 'label_map': dict(enumerate(_VOC_CLASSES_SEGMENTATION)),
94 'background_label': 0,
95 'segmentation_colors': _SEGMENTATION_COLORS
98 return annotations, meta
101 class PascalVOCDetectionConverterConfig(BaseFormatConverterConfig):
102 image_set_file = PathField()
103 annotations_dir = PathField(is_directory=True)
104 images_dir = PathField(optional=True, is_directory=True)
105 has_background = BoolField(optional=True)
108 class PascalVOCDetectionConverter(BaseFormatConverter):
109 __provider__ = 'voc07'
111 _config_validator_type = PascalVOCDetectionConverterConfig
114 self.image_set_file = self.config['image_set_file']
115 self.image_dir = self.config.get('images_dir')
116 if not self.image_dir:
117 self.image_dir = get_path(self.image_set_file.parents[-2] / 'JPEGImages')
118 self.annotations_dir = self.config['annotations_dir']
119 self.has_background = self.config.get('has_background', True)
122 class_to_ind = prepare_detection_labels(self.has_background)
125 for image in tqdm(read_txt(self.image_set_file, sep=None)):
126 root = read_xml(self.annotations_dir / '{}.xml'.format(image))
128 identifier = root.find('.//filename').text
129 get_path(self.image_dir / identifier)
131 labels, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], []
132 difficult_indices = []
134 if not entry.tag.startswith('object'):
137 bbox = entry.find('bndbox')
138 difficult = int(entry.find('difficult').text)
141 difficult_indices.append(len(labels))
143 labels.append(class_to_ind[entry.find('name').text])
144 x_mins.append(float(bbox.find('xmin').text) - 1)
145 y_mins.append(float(bbox.find('ymin').text) - 1)
146 x_maxs.append(float(bbox.find('xmax').text) - 1)
147 y_maxs.append(float(bbox.find('ymax').text) - 1)
149 image_annotation = DetectionAnnotation(identifier, labels, x_mins, y_mins, x_maxs, y_maxs)
150 image_annotation.metadata['difficult_boxes'] = difficult_indices
152 detections.append(image_annotation)
154 meta = {'label_map': reverse_label_map(class_to_ind)}
155 if self.has_background:
156 meta['background_label'] = 0
158 return detections, meta