tools/accuracy_checker/accuracy_checker/annotation_converters/pascal_voc.py

   1 """
   2 Copyright (c) 2019 Intel Corporation
   3
   4 Licensed under the Apache License, Version 2.0 (the "License");
   5 you may not use this file except in compliance with the License.
   6 You may obtain a copy of the License at
   7
   8       http://www.apache.org/licenses/LICENSE-2.0
   9
  10 Unless required by applicable law or agreed to in writing, software
  11 distributed under the License is distributed on an "AS IS" BASIS,
  12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 See the License for the specific language governing permissions and
  14 limitations under the License.
  15 """
  16
  17 from tqdm import tqdm
  18 from pathlib import Path
  19
  20 from ..config import PathField, BoolField
  21 from ..representation import DetectionAnnotation, SegmentationAnnotation
  22 from ..representation.segmentation_representation import GTMaskLoader
  23 from ..utils import get_path, read_txt, read_xml
  24 from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
  25
  26 _VOC_CLASSES_DETECTION = (
  27     'aeroplane', 'bicycle', 'bird', 'boat',
  28     'bottle', 'bus', 'car', 'cat', 'chair',
  29     'cow', 'diningtable', 'dog', 'horse',
  30     'motorbike', 'person', 'pottedplant',
  31     'sheep', 'sofa', 'train', 'tvmonitor'
  32 )
  33
  34 _VOC_CLASSES_SEGMENTATION = tuple(['__background__']) + _VOC_CLASSES_DETECTION
  35 _SEGMENTATION_COLORS = ((
  36     (0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0),
  37     (0, 0, 128), (128, 0, 128), (0, 128, 128), (128, 128, 128),
  38     (64, 0, 0), (192, 0, 0), (64, 128, 0), (192, 128, 0),
  39     (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
  40     (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0),
  41     (0, 64, 128)
  42 ))
  43
  44
  45 def prepare_detection_labels(has_background=True):
  46     num_classes = len(_VOC_CLASSES_DETECTION)
  47     labels_shift = 1 if has_background else 0
  48     reversed_label_map = dict(zip(_VOC_CLASSES_DETECTION, list(range(labels_shift, num_classes + labels_shift))))
  49     if has_background:
  50         reversed_label_map['__background__'] = 0
  51
  52     return reversed_label_map
  53
  54
  55 def reverse_label_map(label_map):
  56     return {value: key for key, value in label_map.items()}
  57
  58
  59 class PascalVOCSegmentationConverterConfig(BaseFormatConverterConfig):
  60     image_set_file = PathField()
  61     images_dir = PathField(optional=True, is_directory=True)
  62     mask_dir = PathField(optional=True, is_directory=True)
  63
  64
  65 class PascalVOCSegmentationConverter(BaseFormatConverter):
  66     __provider__ = 'voc_segmentation'
  67
  68     _config_validator_type = PascalVOCSegmentationConverterConfig
  69
  70     def configure(self):
  71         self.image_set_file = self.config['image_set_file']
  72         self.image_dir = self.config.get('images_dir')
  73         if not self.image_dir:
  74             self.image_dir = get_path(self.image_set_file.parents[-2] / 'JPEGImages', is_directory=True)
  75
  76         self.mask_dir = self.config.get('mask_dir')
  77         if not self.mask_dir:
  78             self.mask_dir = get_path(self.image_set_file.parents[-2] / 'SegmentationClass', is_directory=True)
  79
  80     def convert(self):
  81
  82         annotations = []
  83         for image in read_txt(self.image_set_file):
  84             annotation = SegmentationAnnotation(
  85                 str(Path(self.image_dir.name) / '{}.jpg'.format(image)),
  86                 str(Path(self.mask_dir.name) / '{}.png'.format(image)),
  87                 mask_loader=GTMaskLoader.SCIPY
  88             )
  89
  90             annotations.append(annotation)
  91
  92         meta = {
  93             'label_map': dict(enumerate(_VOC_CLASSES_SEGMENTATION)),
  94             'background_label': 0,
  95             'segmentation_colors': _SEGMENTATION_COLORS
  96         }
  97
  98         return annotations, meta
  99
 100
 101 class PascalVOCDetectionConverterConfig(BaseFormatConverterConfig):
 102     image_set_file = PathField()
 103     annotations_dir = PathField(is_directory=True)
 104     images_dir = PathField(optional=True, is_directory=True)
 105     has_background = BoolField(optional=True)
 106
 107
 108 class PascalVOCDetectionConverter(BaseFormatConverter):
 109     __provider__ = 'voc07'
 110
 111     _config_validator_type = PascalVOCDetectionConverterConfig
 112
 113     def configure(self):
 114         self.image_set_file = self.config['image_set_file']
 115         self.image_dir = self.config.get('images_dir')
 116         if not self.image_dir:
 117             self.image_dir = get_path(self.image_set_file.parents[-2] / 'JPEGImages')
 118         self.annotations_dir = self.config['annotations_dir']
 119         self.has_background = self.config.get('has_background', True)
 120
 121     def convert(self):
 122         class_to_ind = prepare_detection_labels(self.has_background)
 123
 124         detections = []
 125         for image in tqdm(read_txt(self.image_set_file, sep=None)):
 126             root = read_xml(self.annotations_dir / '{}.xml'.format(image))
 127
 128             identifier = root.find('.//filename').text
 129             get_path(self.image_dir / identifier)
 130
 131             labels, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], []
 132             difficult_indices = []
 133             for entry in root:
 134                 if not entry.tag.startswith('object'):
 135                     continue
 136
 137                 bbox = entry.find('bndbox')
 138                 difficult = int(entry.find('difficult').text)
 139
 140                 if difficult == 1:
 141                     difficult_indices.append(len(labels))
 142
 143                 labels.append(class_to_ind[entry.find('name').text])
 144                 x_mins.append(float(bbox.find('xmin').text) - 1)
 145                 y_mins.append(float(bbox.find('ymin').text) - 1)
 146                 x_maxs.append(float(bbox.find('xmax').text) - 1)
 147                 y_maxs.append(float(bbox.find('ymax').text) - 1)
 148
 149             image_annotation = DetectionAnnotation(identifier, labels, x_mins, y_mins, x_maxs, y_maxs)
 150             image_annotation.metadata['difficult_boxes'] = difficult_indices
 151
 152             detections.append(image_annotation)
 153
 154         meta = {'label_map': reverse_label_map(class_to_ind)}
 155         if self.has_background:
 156             meta['background_label'] = 0
 157
 158         return detections, meta