tools/accuracy_checker/accuracy_checker/metrics/detection.py

   1 """
   2 Copyright (c) 2019 Intel Corporation
   3
   4 Licensed under the Apache License, Version 2.0 (the "License");
   5 you may not use this file except in compliance with the License.
   6 You may obtain a copy of the License at
   7
   8       http://www.apache.org/licenses/LICENSE-2.0
   9
  10 Unless required by applicable law or agreed to in writing, software
  11 distributed under the License is distributed on an "AS IS" BASIS,
  12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 See the License for the specific language governing permissions and
  14 limitations under the License.
  15 """
  16
  17 import bisect
  18 import enum
  19 import warnings
  20 from typing import List
  21
  22 import numpy as np
  23
  24 from ..utils import finalize_metric_result
  25 from .overlap import Overlap, IOA
  26 from ..config import BoolField, NumberField, StringField
  27 from ..representation import DetectionAnnotation, DetectionPrediction
  28 from .metric import BaseMetricConfig, FullDatasetEvaluationMetric
  29
  30
  31 class APIntegralType(enum.Enum):
  32     voc_11_point = '11point'
  33     voc_max = 'max'
  34
  35
  36 class BaseDetectionMetricConfig(BaseMetricConfig):
  37     overlap_threshold = NumberField(min_value=0, max_value=1, optional=True)
  38     ignore_difficult = BoolField(optional=True)
  39     include_boundaries = BoolField(optional=True)
  40     distinct_conf = BoolField(optional=True)
  41     allow_multiple_matches_per_ignored = BoolField(optional=True)
  42     overlap_method = StringField(optional=True, choices=Overlap.providers)
  43     use_filtered_tp = BoolField(optional=True)
  44
  45
  46 class BaseDetectionMetricMixin:
  47     def configure(self):
  48         self.overlap_threshold = self.config.get('overlap_threshold', 0.5)
  49         self.ignore_difficult = self.config.get('ignore_difficult', True)
  50         self.include_boundaries = self.config.get('include_boundaries', True)
  51         self.distinct_conf = self.config.get('distinct_conf', False)
  52         self.allow_multiple_matches_per_ignored = self.config.get('allow_multiple_matches_per_ignored', False)
  53         self.overlap_method = Overlap.provide(self.config.get('overlap', 'iou'), self.include_boundaries)
  54         self.use_filtered_tp = self.config.get('use_filtered_tp', False)
  55
  56         label_map = self.config.get('label_map', 'label_map')
  57         labels = self.dataset.metadata.get(label_map, {})
  58         self.labels = labels.keys()
  59         valid_labels = list(filter(lambda x: x != self.dataset.metadata.get('background_label'), self.labels))
  60         self.meta['names'] = [labels[name] for name in valid_labels]
  61
  62     def per_class_detection_statistics(self, annotations, predictions, labels):
  63         labels_stat = {}
  64         for label in labels:
  65             tp, fp, conf, n = bbox_match(
  66                 annotations, predictions, int(label),
  67                 self.overlap_method, self.overlap_threshold,
  68                 self.ignore_difficult, self.allow_multiple_matches_per_ignored, self.include_boundaries,
  69                 self.use_filtered_tp
  70             )
  71
  72             if not tp.size:
  73                 labels_stat[label] = {
  74                     'precision': np.array([]),
  75                     'recall': np.array([]),
  76                     'thresholds': conf,
  77                     'fppi': np.array([])
  78                 }
  79                 continue
  80
  81             # select only values for distinct confidences
  82             if self.distinct_conf:
  83                 distinct_value_indices = np.where(np.diff(conf))[0]
  84                 threshold_indexes = np.r_[distinct_value_indices, tp.size - 1]
  85             else:
  86                 threshold_indexes = np.arange(conf.size)
  87
  88             tp, fp = np.cumsum(tp)[threshold_indexes], np.cumsum(fp)[threshold_indexes]
  89
  90             labels_stat[label] = {
  91                 'precision': tp / np.maximum(tp + fp, np.finfo(np.float64).eps),
  92                 'recall': tp / np.maximum(n, np.finfo(np.float64).eps),
  93                 'thresholds': conf[threshold_indexes],
  94                 'fppi': fp / len(annotations)
  95             }
  96
  97         return labels_stat
  98
  99
 100 class DetectionMAP(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
 101     """
 102     Class for evaluating mAP metric of detection models.
 103     """
 104
 105     __provider__ = 'map'
 106
 107     annotation_types = (DetectionAnnotation, )
 108     prediction_types = (DetectionPrediction, )
 109
 110     def validate_config(self):
 111         class _MAPConfigValidator(BaseDetectionMetricConfig):
 112             integral = StringField(choices=[e.value for e in APIntegralType], optional=True)
 113
 114         map_config_validator = _MAPConfigValidator(
 115             self.__provider__, on_extra_argument=_MAPConfigValidator.ERROR_ON_EXTRA_ARGUMENT
 116         )
 117         map_config_validator.validate(self.config)
 118
 119     def configure(self):
 120         super().configure()
 121         self.integral = APIntegralType(self.config.get('integral', APIntegralType.voc_max))
 122
 123     def evaluate(self, annotations, predictions):
 124         valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
 125         labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
 126
 127         average_precisions = []
 128         for label in labels_stat:
 129             label_precision = labels_stat[label]['precision']
 130             label_recall = labels_stat[label]['recall']
 131             if label_recall.size:
 132                 ap = average_precision(label_precision, label_recall, self.integral)
 133                 average_precisions.append(ap)
 134             else:
 135                 average_precisions.append(np.nan)
 136
 137         average_precisions, self.meta['names'] = finalize_metric_result(average_precisions, self.meta['names'])
 138         if not average_precisions:
 139             warnings.warn("No detections to compute mAP")
 140             average_precisions.append(0)
 141
 142         return average_precisions
 143
 144
 145 class MissRate(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
 146     """
 147     Class for evaluating Miss Rate metric of detection models.
 148     """
 149
 150     __provider__ = 'miss_rate'
 151
 152     annotation_types = (DetectionAnnotation, )
 153     prediction_types = (DetectionPrediction, )
 154
 155     def validate_config(self):
 156         class _MRConfigValidator(BaseDetectionMetricConfig):
 157             fppi_level = NumberField(min_value=0, max_value=1)
 158
 159         nms_config_validator = _MRConfigValidator(
 160             self.__provider__, on_extra_argument=_MRConfigValidator.ERROR_ON_EXTRA_ARGUMENT
 161         )
 162         nms_config_validator.validate(self.config)
 163
 164     def configure(self):
 165         super().configure()
 166         self.fppi_level = self.config.get('fppi_level')
 167
 168     def evaluate(self, annotations, predictions):
 169         valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
 170         labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
 171
 172         miss_rates = []
 173         for label in labels_stat:
 174             label_miss_rate = 1.0 - labels_stat[label]['recall']
 175             label_fppi = labels_stat[label]['fppi']
 176
 177             position = bisect.bisect_left(label_fppi, self.fppi_level)
 178             m0 = max(0, position - 1)
 179             m1 = position if position < len(label_miss_rate) else m0
 180             miss_rates.append(0.5 * (label_miss_rate[m0] + label_miss_rate[m1]))
 181
 182         return miss_rates
 183
 184
 185 class Recall(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
 186     """
 187     Class for evaluating recall metric of detection models.
 188     """
 189
 190     __provider__ = 'recall'
 191
 192     annotation_types = (DetectionAnnotation, )
 193     prediction_types = (DetectionPrediction, )
 194
 195     def validate_config(self):
 196         recall_config_validator = BaseDetectionMetricConfig(
 197             self.__provider__, on_extra_argument=BaseDetectionMetricConfig.ERROR_ON_EXTRA_ARGUMENT
 198         )
 199         recall_config_validator.validate(self.config)
 200
 201     def evaluate(self, annotations, predictions):
 202         valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
 203         labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
 204
 205         recalls = []
 206         for label in labels_stat:
 207             label_recall = labels_stat[label]['recall']
 208             if label_recall.size:
 209                 max_recall = label_recall[-1]
 210                 recalls.append(max_recall)
 211             else:
 212                 recalls.append(np.nan)
 213
 214         recalls, self.meta['names'] = finalize_metric_result(recalls, self.meta['names'])
 215         if not recalls:
 216             warnings.warn("No detections to compute mAP")
 217             recalls.append(0)
 218
 219         return recalls
 220
 221
 222 class DetectionAccuracyMetric(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
 223     __provider__ = 'detection_accuracy'
 224
 225     annotation_types = (DetectionAnnotation, )
 226     prediction_types = (DetectionPrediction, )
 227
 228     def validate_config(self):
 229         class _DAConfigValidator(BaseDetectionMetricConfig):
 230             use_normalization = BoolField(optional=True)
 231
 232         da_config_validator = _DAConfigValidator(
 233             self.__provider__, on_extra_argument=_DAConfigValidator.ERROR_ON_EXTRA_ARGUMENT
 234         )
 235         da_config_validator.validate(self.config)
 236
 237     def configure(self):
 238         super().configure()
 239         self.use_normalization = self.config.get('use_normalization', False)
 240
 241     def evaluate(self, annotations, predictions):
 242         all_matches, _, _ = match_detections_class_agnostic(
 243             predictions, annotations, self.overlap_threshold, self.overlap_method
 244         )
 245         cm = confusion_matrix(all_matches, predictions, annotations, len(self.labels))
 246         if self.use_normalization:
 247             return np.mean(normalize_confusion_matrix(cm).diagonal())
 248
 249         return float(np.sum(cm.diagonal())) / float(np.maximum(1, np.sum(cm)))
 250
 251
 252 def confusion_matrix(all_matched_ids, predicted_data, gt_data, num_classes):
 253     out_cm = np.zeros([num_classes, num_classes], dtype=np.int32)
 254     for gt, prediction in zip(gt_data, predicted_data):
 255         for match_pair in all_matched_ids[gt.identifier]:
 256             gt_label = int(gt.labels[match_pair[0]])
 257             pred_label = int(prediction.labels[match_pair[1]])
 258             out_cm[gt_label, pred_label] += 1
 259
 260     return out_cm
 261
 262
 263 def normalize_confusion_matrix(cm):
 264     row_sums = np.maximum(1, np.sum(cm, axis=1, keepdims=True)).astype(np.float32)
 265     return cm.astype(np.float32) / row_sums
 266
 267
 268 def match_detections_class_agnostic(predicted_data, gt_data, min_iou, overlap_method):
 269     all_matches = {}
 270     total_gt_bbox_num = 0
 271     matched_gt_bbox_num = 0
 272
 273     for gt, prediction in zip(gt_data, predicted_data):
 274         gt_bboxes = np.stack((gt.x_mins, gt.y_mins, gt.x_maxs, gt.y_maxs), axis=-1)
 275         predicted_bboxes = np.stack(
 276             (prediction.x_mins, prediction.y_mins, prediction.x_maxs, prediction.y_maxs), axis=-1
 277         )
 278
 279         total_gt_bbox_num += len(gt_bboxes)
 280
 281         similarity_matrix = calculate_similarity_matrix(gt_bboxes, predicted_bboxes, overlap_method)
 282
 283         matches = []
 284         for _ in gt_bboxes:
 285             best_match_pos = np.unravel_index(similarity_matrix.argmax(), similarity_matrix.shape)
 286             best_match_value = similarity_matrix[best_match_pos]
 287
 288             if best_match_value <= min_iou:
 289                 break
 290
 291             gt_id = best_match_pos[0]
 292             predicted_id = best_match_pos[1]
 293
 294             similarity_matrix[gt_id, :] = 0.0
 295             similarity_matrix[:, predicted_id] = 0.0
 296
 297             matches.append((gt_id, predicted_id))
 298             matched_gt_bbox_num += 1
 299
 300         all_matches[gt.identifier] = matches
 301
 302     return all_matches, total_gt_bbox_num, matched_gt_bbox_num
 303
 304
 305 def calculate_similarity_matrix(set_a, set_b, overlap):
 306     similarity = np.zeros([len(set_a), len(set_b)], dtype=np.float32)
 307     for i, box_a in enumerate(set_a):
 308         for j, box_b in enumerate(set_b):
 309             similarity[i, j] = overlap(box_a, box_b)
 310
 311     return similarity
 312
 313
 314 def average_precision(precision, recall, integral):
 315     if integral == APIntegralType.voc_11_point:
 316         result = 0.
 317         for point in np.arange(0., 1.1, 0.1):
 318             accumulator = 0 if np.sum(recall >= point) == 0 else np.max(precision[recall >= point])
 319             result = result + accumulator / 11.
 320
 321         return result
 322
 323     if integral != APIntegralType.voc_max:
 324         raise NotImplementedError("Integral type not implemented")
 325
 326     # first append sentinel values at the end
 327     recall = np.concatenate(([0.], recall, [1.]))
 328     precision = np.concatenate(([0.], precision, [0.]))
 329
 330     # compute the precision envelope
 331     for i in range(precision.size - 1, 0, -1):
 332         precision[i - 1] = np.maximum(precision[i - 1], precision[i])
 333
 334     # to calculate area under PR curve, look for points
 335     # where X axis (recall) changes value
 336     change_point = np.where(recall[1:] != recall[:-1])[0]
 337     # and sum (\Delta recall) * recall
 338     return np.sum((recall[change_point + 1] - recall[change_point]) * precision[change_point + 1])
 339
 340
 341 def bbox_match(annotation: List[DetectionAnnotation], prediction: List[DetectionPrediction], label, overlap_evaluator,
 342                overlap_thresh=0.5, ignore_difficult=True, allow_multiple_matches_per_ignored=True,
 343                include_boundaries=True, use_filtered_tp=False):
 344     """
 345     Args:
 346         annotation: ground truth bounding boxes.
 347         prediction: predicted bounding boxes.
 348         label: class for which bounding boxes are matched.
 349         overlap_evaluator: evaluator of overlap.
 350         overlap_thresh: bounding box IoU threshold.
 351         ignore_difficult: ignores difficult bounding boxes (see Pascal VOC).
 352         allow_multiple_matches_per_ignored: allows multiple matches per ignored.
 353         include_boundaries: if is True then width and height of box is calculated by max - min + 1.
 354         use_filtered_tp: if is True then ignored object are counted during evaluation.
 355     Returns:
 356         tp: tp[i] == 1 if detection with i-th highest score is true positive.
 357         fp: fp[i] == 1 if detection with i-th highest score is false positive.
 358         thresholds: array of confidence thresholds.
 359         number_ground_truth = number of true positives.
 360     """
 361
 362     used_boxes, number_ground_truth, difficult_boxes_annotation = _prepare_annotation_boxes(
 363         annotation, ignore_difficult, label
 364     )
 365     prediction_boxes, prediction_images, difficult_boxes_prediction = _prepare_prediction_boxes(
 366         label, prediction, ignore_difficult
 367     )
 368
 369     tp = np.zeros_like(prediction_images)
 370     fp = np.zeros_like(prediction_images)
 371
 372     for image in range(prediction_images.shape[0]):
 373         gt_img = annotation[prediction_images[image]]
 374         annotation_difficult = difficult_boxes_annotation[gt_img.identifier]
 375         used = used_boxes[gt_img.identifier]
 376
 377         idx = gt_img.labels == label
 378         if not np.array(idx).any():
 379             fp[image] = 1
 380             continue
 381
 382         prediction_box = prediction_boxes[image][1:]
 383         annotation_boxes = gt_img.x_mins[idx], gt_img.y_mins[idx], gt_img.x_maxs[idx], gt_img.y_maxs[idx]
 384
 385         overlaps = overlap_evaluator(prediction_box, annotation_boxes)
 386         if ignore_difficult and allow_multiple_matches_per_ignored:
 387             ioa = IOA(include_boundaries)
 388             ignored = np.where(annotation_difficult == 1)[0]
 389             ignored_annotation_boxes = (
 390                 annotation_boxes[0][ignored], annotation_boxes[1][ignored],
 391                 annotation_boxes[2][ignored], annotation_boxes[3][ignored]
 392             )
 393             overlaps[ignored] = ioa.evaluate(prediction_box, ignored_annotation_boxes)
 394
 395         max_overlap = -np.inf
 396
 397         not_ignored_overlaps = overlaps[np.where(annotation_difficult == 0)[0]]
 398         ignored_overlaps = overlaps[np.where(annotation_difficult == 1)[0]]
 399         if not_ignored_overlaps.size:
 400             max_overlap = np.max(not_ignored_overlaps)
 401
 402         if max_overlap < overlap_thresh and ignored_overlaps.size:
 403             max_overlap = np.max(ignored_overlaps)
 404         max_overlapped = np.where(overlaps == max_overlap)[0]
 405
 406         def set_false_positive(box_index):
 407             is_box_difficult = difficult_boxes_prediction[box_index].any()
 408             return int(not ignore_difficult or not is_box_difficult)
 409
 410         if max_overlap < overlap_thresh:
 411             fp[image] = set_false_positive(image)
 412             continue
 413
 414         if not annotation_difficult[max_overlapped].any():
 415             if not used[max_overlapped].any():
 416                 if not ignore_difficult or use_filtered_tp or not difficult_boxes_prediction[image].any():
 417                     tp[image] = 1
 418                     used[max_overlapped] = True
 419             else:
 420                 fp[image] = set_false_positive(image)
 421         elif not allow_multiple_matches_per_ignored:
 422             if used[max_overlapped].any():
 423                 fp[image] = set_false_positive(image)
 424             used[max_overlapped] = True
 425
 426     return tp, fp, prediction_boxes[:, 0], number_ground_truth
 427
 428
 429 def _prepare_annotation_boxes(annotation, ignore_difficult, label):
 430     used_boxes = {}
 431     difficult_boxes = {}
 432     num_ground_truth = 0
 433
 434     for ground_truth in annotation:
 435         idx_for_label = ground_truth.labels == label
 436         filtered_label = ground_truth.labels[idx_for_label]
 437         used_ = np.zeros_like(filtered_label)
 438         used_boxes[ground_truth.identifier] = used_
 439         num_ground_truth += used_.shape[0]
 440
 441         difficult_box_mask = np.full_like(ground_truth.labels, False)
 442         difficult_box_indices = ground_truth.metadata.get("difficult_boxes", [])
 443         if ignore_difficult:
 444             difficult_box_mask[difficult_box_indices] = True
 445         difficult_box_mask = difficult_box_mask[idx_for_label]
 446
 447         difficult_boxes[ground_truth.identifier] = difficult_box_mask
 448         if ignore_difficult:
 449             num_ground_truth -= np.sum(difficult_box_mask)
 450
 451     return used_boxes, num_ground_truth, difficult_boxes
 452
 453
 454 def _prepare_prediction_boxes(label, predictions, ignore_difficult):
 455     prediction_images = []
 456     prediction_boxes = []
 457     indexes = []
 458     difficult_boxes = []
 459     for i, prediction in enumerate(predictions):
 460         idx = prediction.labels == label
 461
 462         prediction_images.append(np.full(prediction.labels[idx].shape, i))
 463         prediction_boxes.append(np.c_[
 464             prediction.scores[idx],
 465             prediction.x_mins[idx], prediction.y_mins[idx], prediction.x_maxs[idx], prediction.y_maxs[idx]
 466         ])
 467
 468         difficult_box_mask = np.full_like(prediction.labels, False)
 469         difficult_box_indices = prediction.metadata.get("difficult_boxes", [])
 470         if ignore_difficult:
 471             difficult_box_mask[difficult_box_indices] = True
 472
 473         difficult_boxes.append(difficult_box_mask)
 474         indexes.append(np.argwhere(idx))
 475
 476     prediction_boxes = np.concatenate(prediction_boxes)
 477     difficult_boxes = np.concatenate(difficult_boxes)
 478     sorted_order = np.argsort(-prediction_boxes[:, 0])
 479     prediction_boxes = prediction_boxes[sorted_order]
 480     prediction_images = np.concatenate(prediction_images)[sorted_order]
 481     difficult_boxes = difficult_boxes[sorted_order]
 482
 483     return prediction_boxes, prediction_images, difficult_boxes
 484
 485
 486 def get_valid_labels(labels, background):
 487     return list(filter(lambda label: label != background, labels))