tools/accuracy_checker/accuracy_checker/annotation_converters/pascal_voc.py

   1 """
   2 Copyright (c) 2019 Intel Corporation
   3
   4 Licensed under the Apache License, Version 2.0 (the "License");
   5 you may not use this file except in compliance with the License.
   6 You may obtain a copy of the License at
   7
   8       http://www.apache.org/licenses/LICENSE-2.0
   9
  10 Unless required by applicable law or agreed to in writing, software
  11 distributed under the License is distributed on an "AS IS" BASIS,
  12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 See the License for the specific language governing permissions and
  14 limitations under the License.
  15 """
  16
  17 from tqdm import tqdm
  18
  19 from ..config import PathField, BoolField
  20 from ..representation import DetectionAnnotation, SegmentationAnnotation
  21 from ..representation.segmentation_representation import GTMaskLoader
  22 from ..utils import get_path, read_txt, read_xml
  23 from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
  24
  25 _VOC_CLASSES_DETECTION = (
  26     'aeroplane', 'bicycle', 'bird', 'boat',
  27     'bottle', 'bus', 'car', 'cat', 'chair',
  28     'cow', 'diningtable', 'dog', 'horse',
  29     'motorbike', 'person', 'pottedplant',
  30     'sheep', 'sofa', 'train', 'tvmonitor'
  31 )
  32
  33 _VOC_CLASSES_SEGMENTATION = tuple(['__background__']) + _VOC_CLASSES_DETECTION
  34 _SEGMENTATION_COLORS = ((
  35     (0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0),
  36     (0, 0, 128), (128, 0, 128), (0, 128, 128), (128, 128, 128),
  37     (64, 0, 0), (192, 0, 0), (64, 128, 0), (192, 128, 0),
  38     (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
  39     (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0),
  40     (0, 64, 128)
  41 ))
  42
  43
  44 def prepare_detection_labels(has_background=True):
  45     num_classes = len(_VOC_CLASSES_DETECTION)
  46     labels_shift = 1 if has_background else 0
  47     reversed_label_map = dict(zip(_VOC_CLASSES_DETECTION, list(range(labels_shift, num_classes + labels_shift))))
  48     if has_background:
  49         reversed_label_map['__background__'] = 0
  50
  51     return reversed_label_map
  52
  53
  54 def reverse_label_map(label_map):
  55     return {value: key for key, value in label_map.items()}
  56
  57
  58 class PascalVOCSegmentationConverterConfig(BaseFormatConverterConfig):
  59     image_set_file = PathField()
  60     images_dir = PathField(optional=True, is_directory=True)
  61     mask_dir = PathField(optional=True, is_directory=True)
  62
  63
  64 class PascalVOCSegmentationConverter(BaseFormatConverter):
  65     __provider__ = 'voc_segmentation'
  66
  67     _config_validator_type = PascalVOCSegmentationConverterConfig
  68
  69     def configure(self):
  70         self.image_set_file = self.config['image_set_file']
  71         self.image_dir = self.config.get('images_dir')
  72         if not self.image_dir:
  73             self.image_dir = get_path(self.image_set_file.parent / 'JPEGImages')
  74
  75         self.mask_dir = self.config.get('mask_dir')
  76         if not self.mask_dir:
  77             self.mask_dir = get_path(self.image_set_file.parent / 'SegmentationClass')
  78
  79     def convert(self):
  80
  81         annotations = []
  82         for image in read_txt(self.image_set_file):
  83             annotation = SegmentationAnnotation(
  84                 str(self.image_dir.name / '{}.jpg'.format(image)),
  85                 str(self.mask_dir.name / '{}.png'.format(image)),
  86                 mask_loader=GTMaskLoader.SCIPY
  87             )
  88
  89             annotations.append(annotation)
  90
  91         meta = {
  92             'label_map': dict(enumerate(_VOC_CLASSES_SEGMENTATION)),
  93             'background_label': 0,
  94             'segmentation_colors': _SEGMENTATION_COLORS
  95         }
  96
  97         return annotations, meta
  98
  99
 100 class PascalVOCDetectionConverterConfig(BaseFormatConverterConfig):
 101     image_set_file = PathField()
 102     annotations_dir = PathField(is_directory=True)
 103     images_dir = PathField(optional=True, is_directory=True)
 104     has_background = BoolField(optional=True)
 105
 106
 107 class PascalVOCDetectionConverter(BaseFormatConverter):
 108     __provider__ = 'voc07'
 109
 110     _config_validator_type = PascalVOCDetectionConverterConfig
 111
 112     def configure(self):
 113         self.image_set_file = self.config['image_set_file']
 114         self.image_dir = self.config.get('images_dir')
 115         if not self.image_dir:
 116             self.image_dir = get_path(self.image_set_file.parent / 'JPEGImages')
 117         self.annotations_dir = self.config['annotations_dir']
 118         self.has_background = self.config.get('has_background', True)
 119
 120     def convert(self):
 121         class_to_ind = prepare_detection_labels(self.has_background)
 122
 123         detections = []
 124         for image in tqdm(read_txt(self.image_set_file, sep=None)):
 125             root = read_xml(self.annotations_dir / '{}.xml'.format(image))
 126
 127             identifier = root.find('.//filename').text
 128             get_path(self.image_dir / identifier)
 129
 130             labels, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], []
 131             difficult_indices = []
 132             for entry in root:
 133                 if not entry.tag.startswith('object'):
 134                     continue
 135
 136                 bbox = entry.find('bndbox')
 137                 difficult = int(entry.find('difficult').text)
 138
 139                 if difficult == 1:
 140                     difficult_indices.append(len(labels))
 141
 142                 labels.append(class_to_ind[entry.find('name').text])
 143                 x_mins.append(float(bbox.find('xmin').text) - 1)
 144                 y_mins.append(float(bbox.find('ymin').text) - 1)
 145                 x_maxs.append(float(bbox.find('xmax').text) - 1)
 146                 y_maxs.append(float(bbox.find('ymax').text) - 1)
 147
 148             image_annotation = DetectionAnnotation(identifier, labels, x_mins, y_mins, x_maxs, y_maxs)
 149             image_annotation.metadata['difficult_boxes'] = difficult_indices
 150
 151             detections.append(image_annotation)
 152
 153         meta = {'label_map': reverse_label_map(class_to_ind)}
 154         if self.has_background:
 155             meta['background_label'] = 0
 156
 157         return detections, meta