tools/accuracy_checker/accuracy_checker/metrics/detection.py

   1 """
   2 Copyright (c) 2019 Intel Corporation
   3
   4 Licensed under the Apache License, Version 2.0 (the "License");
   5 you may not use this file except in compliance with the License.
   6 You may obtain a copy of the License at
   7
   8       http://www.apache.org/licenses/LICENSE-2.0
   9
  10 Unless required by applicable law or agreed to in writing, software
  11 distributed under the License is distributed on an "AS IS" BASIS,
  12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 See the License for the specific language governing permissions and
  14 limitations under the License.
  15 """
  16
  17 import bisect
  18 import enum
  19 import warnings
  20 from typing import List
  21
  22 import numpy as np
  23
  24 from ..utils import finalize_metric_result
  25 from .overlap import Overlap, IOA
  26 from ..config import BoolField, NumberField, StringField
  27 from ..representation import DetectionAnnotation, DetectionPrediction
  28 from .metric import BaseMetricConfig, FullDatasetEvaluationMetric
  29
  30
  31 class APIntegralType(enum.Enum):
  32     voc_11_point = '11point'
  33     voc_max = 'max'
  34
  35
  36 class BaseDetectionMetricConfig(BaseMetricConfig):
  37     overlap_threshold = NumberField(min_value=0, max_value=1, optional=True)
  38     ignore_difficult = BoolField(optional=True)
  39     include_boundaries = BoolField(optional=True)
  40     distinct_conf = BoolField(optional=True)
  41     allow_multiple_matches_per_ignored = BoolField(optional=True)
  42     overlap_method = StringField(optional=True, choices=Overlap.providers)
  43     use_filtered_tp = BoolField(optional=True)
  44
  45
  46 class MAPConfigValidator(BaseDetectionMetricConfig):
  47     integral = StringField(choices=[e.value for e in APIntegralType], optional=True)
  48
  49
  50 class MRConfigValidator(BaseDetectionMetricConfig):
  51     fppi_level = NumberField(min_value=0, max_value=1)
  52
  53
  54 class DAConfigValidator(BaseDetectionMetricConfig):
  55     use_normalization = BoolField(optional=True)
  56
  57
  58 class BaseDetectionMetricMixin:
  59     def configure(self):
  60         self.overlap_threshold = self.config.get('overlap_threshold', 0.5)
  61         self.ignore_difficult = self.config.get('ignore_difficult', True)
  62         self.include_boundaries = self.config.get('include_boundaries', True)
  63         self.distinct_conf = self.config.get('distinct_conf', False)
  64         self.allow_multiple_matches_per_ignored = self.config.get('allow_multiple_matches_per_ignored', False)
  65         self.overlap_method = Overlap.provide(self.config.get('overlap', 'iou'), self.include_boundaries)
  66         self.use_filtered_tp = self.config.get('use_filtered_tp', False)
  67
  68         label_map = self.config.get('label_map', 'label_map')
  69         labels = self.dataset.metadata.get(label_map, {})
  70         self.labels = labels.keys()
  71         valid_labels = list(filter(lambda x: x != self.dataset.metadata.get('background_label'), self.labels))
  72         self.meta['names'] = [labels[name] for name in valid_labels]
  73
  74     def per_class_detection_statistics(self, annotations, predictions, labels):
  75         labels_stat = {}
  76         for label in labels:
  77             tp, fp, conf, n = bbox_match(
  78                 annotations, predictions, int(label),
  79                 self.overlap_method, self.overlap_threshold,
  80                 self.ignore_difficult, self.allow_multiple_matches_per_ignored, self.include_boundaries,
  81                 self.use_filtered_tp
  82             )
  83
  84             if not tp.size:
  85                 labels_stat[label] = {
  86                     'precision': np.array([]),
  87                     'recall': np.array([]),
  88                     'thresholds': conf,
  89                     'fppi': np.array([])
  90                 }
  91                 continue
  92
  93             # select only values for distinct confidences
  94             if self.distinct_conf:
  95                 distinct_value_indices = np.where(np.diff(conf))[0]
  96                 threshold_indexes = np.r_[distinct_value_indices, tp.size - 1]
  97             else:
  98                 threshold_indexes = np.arange(conf.size)
  99
 100             tp, fp = np.cumsum(tp)[threshold_indexes], np.cumsum(fp)[threshold_indexes]
 101
 102             labels_stat[label] = {
 103                 'precision': tp / np.maximum(tp + fp, np.finfo(np.float64).eps),
 104                 'recall': tp / np.maximum(n, np.finfo(np.float64).eps),
 105                 'thresholds': conf[threshold_indexes],
 106                 'fppi': fp / len(annotations)
 107             }
 108
 109         return labels_stat
 110
 111
 112 class DetectionMAP(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
 113     """
 114     Class for evaluating mAP metric of detection models.
 115     """
 116
 117     __provider__ = 'map'
 118
 119     annotation_types = (DetectionAnnotation, )
 120     prediction_types = (DetectionPrediction, )
 121
 122     _config_validator_type = MAPConfigValidator
 123
 124     def configure(self):
 125         super().configure()
 126         self.integral = APIntegralType(self.config.get('integral', APIntegralType.voc_max))
 127
 128     def evaluate(self, annotations, predictions):
 129         valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
 130         labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
 131
 132         average_precisions = []
 133         for label in labels_stat:
 134             label_precision = labels_stat[label]['precision']
 135             label_recall = labels_stat[label]['recall']
 136             if label_recall.size:
 137                 ap = average_precision(label_precision, label_recall, self.integral)
 138                 average_precisions.append(ap)
 139             else:
 140                 average_precisions.append(np.nan)
 141
 142         average_precisions, self.meta['names'] = finalize_metric_result(average_precisions, self.meta['names'])
 143         if not average_precisions:
 144             warnings.warn("No detections to compute mAP")
 145             average_precisions.append(0)
 146
 147         return average_precisions
 148
 149
 150 class MissRate(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
 151     """
 152     Class for evaluating Miss Rate metric of detection models.
 153     """
 154
 155     __provider__ = 'miss_rate'
 156
 157     annotation_types = (DetectionAnnotation, )
 158     prediction_types = (DetectionPrediction, )
 159
 160     _config_validator_type = MRConfigValidator
 161
 162     def configure(self):
 163         super().configure()
 164         self.fppi_level = self.config.get('fppi_level')
 165
 166     def evaluate(self, annotations, predictions):
 167         valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
 168         labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
 169
 170         miss_rates = []
 171         for label in labels_stat:
 172             label_miss_rate = 1.0 - labels_stat[label]['recall']
 173             label_fppi = labels_stat[label]['fppi']
 174
 175             position = bisect.bisect_left(label_fppi, self.fppi_level)
 176             m0 = max(0, position - 1)
 177             m1 = position if position < len(label_miss_rate) else m0
 178             miss_rates.append(0.5 * (label_miss_rate[m0] + label_miss_rate[m1]))
 179
 180         return miss_rates
 181
 182
 183 class Recall(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
 184     """
 185     Class for evaluating recall metric of detection models.
 186     """
 187
 188     __provider__ = 'recall'
 189
 190     annotation_types = (DetectionAnnotation, )
 191     prediction_types = (DetectionPrediction, )
 192
 193     _config_validator_type = BaseDetectionMetricConfig
 194
 195     def evaluate(self, annotations, predictions):
 196         valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
 197         labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
 198
 199         recalls = []
 200         for label in labels_stat:
 201             label_recall = labels_stat[label]['recall']
 202             if label_recall.size:
 203                 max_recall = label_recall[-1]
 204                 recalls.append(max_recall)
 205             else:
 206                 recalls.append(np.nan)
 207
 208         recalls, self.meta['names'] = finalize_metric_result(recalls, self.meta['names'])
 209         if not recalls:
 210             warnings.warn("No detections to compute mAP")
 211             recalls.append(0)
 212
 213         return recalls
 214
 215
 216 class DetectionAccuracyMetric(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
 217     __provider__ = 'detection_accuracy'
 218
 219     annotation_types = (DetectionAnnotation, )
 220     prediction_types = (DetectionPrediction, )
 221     _config_validator_type = DAConfigValidator
 222
 223     def configure(self):
 224         super().configure()
 225         self.use_normalization = self.config.get('use_normalization', False)
 226
 227     def evaluate(self, annotations, predictions):
 228         all_matches, _, _ = match_detections_class_agnostic(
 229             predictions, annotations, self.overlap_threshold, self.overlap_method
 230         )
 231         cm = confusion_matrix(all_matches, predictions, annotations, len(self.labels))
 232         if self.use_normalization:
 233             return np.mean(normalize_confusion_matrix(cm).diagonal())
 234
 235         return float(np.sum(cm.diagonal())) / float(np.maximum(1, np.sum(cm)))
 236
 237
 238 def confusion_matrix(all_matched_ids, predicted_data, gt_data, num_classes):
 239     out_cm = np.zeros([num_classes, num_classes], dtype=np.int32)
 240     for gt, prediction in zip(gt_data, predicted_data):
 241         for match_pair in all_matched_ids[gt.identifier]:
 242             gt_label = int(gt.labels[match_pair[0]])
 243             pred_label = int(prediction.labels[match_pair[1]])
 244             out_cm[gt_label, pred_label] += 1
 245
 246     return out_cm
 247
 248
 249 def normalize_confusion_matrix(cm):
 250     row_sums = np.maximum(1, np.sum(cm, axis=1, keepdims=True)).astype(np.float32)
 251     return cm.astype(np.float32) / row_sums
 252
 253
 254 def match_detections_class_agnostic(predicted_data, gt_data, min_iou, overlap_method):
 255     all_matches = {}
 256     total_gt_bbox_num = 0
 257     matched_gt_bbox_num = 0
 258
 259     for gt, prediction in zip(gt_data, predicted_data):
 260         gt_bboxes = np.stack((gt.x_mins, gt.y_mins, gt.x_maxs, gt.y_maxs), axis=-1)
 261         predicted_bboxes = np.stack(
 262             (prediction.x_mins, prediction.y_mins, prediction.x_maxs, prediction.y_maxs), axis=-1
 263         )
 264
 265         total_gt_bbox_num += len(gt_bboxes)
 266
 267         similarity_matrix = calculate_similarity_matrix(gt_bboxes, predicted_bboxes, overlap_method)
 268
 269         matches = []
 270         for _ in gt_bboxes:
 271             best_match_pos = np.unravel_index(similarity_matrix.argmax(), similarity_matrix.shape)
 272             best_match_value = similarity_matrix[best_match_pos]
 273
 274             if best_match_value <= min_iou:
 275                 break
 276
 277             gt_id = best_match_pos[0]
 278             predicted_id = best_match_pos[1]
 279
 280             similarity_matrix[gt_id, :] = 0.0
 281             similarity_matrix[:, predicted_id] = 0.0
 282
 283             matches.append((gt_id, predicted_id))
 284             matched_gt_bbox_num += 1
 285
 286         all_matches[gt.identifier] = matches
 287
 288     return all_matches, total_gt_bbox_num, matched_gt_bbox_num
 289
 290
 291 def calculate_similarity_matrix(set_a, set_b, overlap):
 292     similarity = np.zeros([len(set_a), len(set_b)], dtype=np.float32)
 293     for i, box_a in enumerate(set_a):
 294         for j, box_b in enumerate(set_b):
 295             similarity[i, j] = overlap(box_a, box_b)
 296
 297     return similarity
 298
 299
 300 def average_precision(precision, recall, integral):
 301     if integral == APIntegralType.voc_11_point:
 302         result = 0.
 303         for point in np.arange(0., 1.1, 0.1):
 304             accumulator = 0 if np.sum(recall >= point) == 0 else np.max(precision[recall >= point])
 305             result = result + accumulator / 11.
 306
 307         return result
 308
 309     if integral != APIntegralType.voc_max:
 310         raise NotImplementedError("Integral type not implemented")
 311
 312     # first append sentinel values at the end
 313     recall = np.concatenate(([0.], recall, [1.]))
 314     precision = np.concatenate(([0.], precision, [0.]))
 315
 316     # compute the precision envelope
 317     for i in range(precision.size - 1, 0, -1):
 318         precision[i - 1] = np.maximum(precision[i - 1], precision[i])
 319
 320     # to calculate area under PR curve, look for points
 321     # where X axis (recall) changes value
 322     change_point = np.where(recall[1:] != recall[:-1])[0]
 323     # and sum (\Delta recall) * recall
 324     return np.sum((recall[change_point + 1] - recall[change_point]) * precision[change_point + 1])
 325
 326
 327 def bbox_match(annotation: List[DetectionAnnotation], prediction: List[DetectionPrediction], label, overlap_evaluator,
 328                overlap_thresh=0.5, ignore_difficult=True, allow_multiple_matches_per_ignored=True,
 329                include_boundaries=True, use_filtered_tp=False):
 330     """
 331     Args:
 332         annotation: ground truth bounding boxes.
 333         prediction: predicted bounding boxes.
 334         label: class for which bounding boxes are matched.
 335         overlap_evaluator: evaluator of overlap.
 336         overlap_thresh: bounding box IoU threshold.
 337         ignore_difficult: ignores difficult bounding boxes (see Pascal VOC).
 338         allow_multiple_matches_per_ignored: allows multiple matches per ignored.
 339         include_boundaries: if is True then width and height of box is calculated by max - min + 1.
 340         use_filtered_tp: if is True then ignored object are counted during evaluation.
 341     Returns:
 342         tp: tp[i] == 1 if detection with i-th highest score is true positive.
 343         fp: fp[i] == 1 if detection with i-th highest score is false positive.
 344         thresholds: array of confidence thresholds.
 345         number_ground_truth = number of true positives.
 346     """
 347
 348     used_boxes, number_ground_truth, difficult_boxes_annotation = _prepare_annotation_boxes(
 349         annotation, ignore_difficult, label
 350     )
 351     prediction_boxes, prediction_images, difficult_boxes_prediction = _prepare_prediction_boxes(
 352         label, prediction, ignore_difficult
 353     )
 354
 355     tp = np.zeros_like(prediction_images)
 356     fp = np.zeros_like(prediction_images)
 357
 358     for image in range(prediction_images.shape[0]):
 359         gt_img = annotation[prediction_images[image]]
 360         annotation_difficult = difficult_boxes_annotation[gt_img.identifier]
 361         used = used_boxes[gt_img.identifier]
 362
 363         idx = gt_img.labels == label
 364         if not np.array(idx).any():
 365             fp[image] = 1
 366             continue
 367
 368         prediction_box = prediction_boxes[image][1:]
 369         annotation_boxes = gt_img.x_mins[idx], gt_img.y_mins[idx], gt_img.x_maxs[idx], gt_img.y_maxs[idx]
 370
 371         overlaps = overlap_evaluator(prediction_box, annotation_boxes)
 372         if ignore_difficult and allow_multiple_matches_per_ignored:
 373             ioa = IOA(include_boundaries)
 374             ignored = np.where(annotation_difficult == 1)[0]
 375             ignored_annotation_boxes = (
 376                 annotation_boxes[0][ignored], annotation_boxes[1][ignored],
 377                 annotation_boxes[2][ignored], annotation_boxes[3][ignored]
 378             )
 379             overlaps[ignored] = ioa.evaluate(prediction_box, ignored_annotation_boxes)
 380
 381         max_overlap = -np.inf
 382
 383         not_ignored_overlaps = overlaps[np.where(annotation_difficult == 0)[0]]
 384         ignored_overlaps = overlaps[np.where(annotation_difficult == 1)[0]]
 385         if not_ignored_overlaps.size:
 386             max_overlap = np.max(not_ignored_overlaps)
 387
 388         if max_overlap < overlap_thresh and ignored_overlaps.size:
 389             max_overlap = np.max(ignored_overlaps)
 390         max_overlapped = np.where(overlaps == max_overlap)[0]
 391
 392         def set_false_positive(box_index):
 393             is_box_difficult = difficult_boxes_prediction[box_index].any()
 394             return int(not ignore_difficult or not is_box_difficult)
 395
 396         if max_overlap < overlap_thresh:
 397             fp[image] = set_false_positive(image)
 398             continue
 399
 400         if not annotation_difficult[max_overlapped].any():
 401             if not used[max_overlapped].any():
 402                 if not ignore_difficult or use_filtered_tp or not difficult_boxes_prediction[image].any():
 403                     tp[image] = 1
 404                     used[max_overlapped] = True
 405             else:
 406                 fp[image] = set_false_positive(image)
 407         elif not allow_multiple_matches_per_ignored:
 408             if used[max_overlapped].any():
 409                 fp[image] = set_false_positive(image)
 410             used[max_overlapped] = True
 411
 412     return tp, fp, prediction_boxes[:, 0], number_ground_truth
 413
 414
 415 def _prepare_annotation_boxes(annotation, ignore_difficult, label):
 416     used_boxes = {}
 417     difficult_boxes = {}
 418     num_ground_truth = 0
 419
 420     for ground_truth in annotation:
 421         idx_for_label = ground_truth.labels == label
 422         filtered_label = ground_truth.labels[idx_for_label]
 423         used_ = np.zeros_like(filtered_label)
 424         used_boxes[ground_truth.identifier] = used_
 425         num_ground_truth += used_.shape[0]
 426
 427         difficult_box_mask = np.full_like(ground_truth.labels, False)
 428         difficult_box_indices = ground_truth.metadata.get("difficult_boxes", [])
 429         if ignore_difficult:
 430             difficult_box_mask[difficult_box_indices] = True
 431         difficult_box_mask = difficult_box_mask[idx_for_label]
 432
 433         difficult_boxes[ground_truth.identifier] = difficult_box_mask
 434         if ignore_difficult:
 435             num_ground_truth -= np.sum(difficult_box_mask)
 436
 437     return used_boxes, num_ground_truth, difficult_boxes
 438
 439
 440 def _prepare_prediction_boxes(label, predictions, ignore_difficult):
 441     prediction_images = []
 442     prediction_boxes = []
 443     indexes = []
 444     difficult_boxes = []
 445     for i, prediction in enumerate(predictions):
 446         idx = prediction.labels == label
 447
 448         prediction_images.append(np.full(prediction.labels[idx].shape, i))
 449         prediction_boxes.append(np.c_[
 450             prediction.scores[idx],
 451             prediction.x_mins[idx], prediction.y_mins[idx], prediction.x_maxs[idx], prediction.y_maxs[idx]
 452         ])
 453
 454         difficult_box_mask = np.full_like(prediction.labels, False)
 455         difficult_box_indices = prediction.metadata.get("difficult_boxes", [])
 456         if ignore_difficult:
 457             difficult_box_mask[difficult_box_indices] = True
 458
 459         difficult_boxes.append(difficult_box_mask)
 460         indexes.append(np.argwhere(idx))
 461
 462     prediction_boxes = np.concatenate(prediction_boxes)
 463     difficult_boxes = np.concatenate(difficult_boxes)
 464     sorted_order = np.argsort(-prediction_boxes[:, 0])
 465     prediction_boxes = prediction_boxes[sorted_order]
 466     prediction_images = np.concatenate(prediction_images)[sorted_order]
 467     difficult_boxes = difficult_boxes[sorted_order]
 468
 469     return prediction_boxes, prediction_images, difficult_boxes
 470
 471
 472 def get_valid_labels(labels, background):
 473     return list(filter(lambda label: label != background, labels))