7a5c29c7c0d81f20016943cd958414651e4ff927
[platform/upstream/dldt.git] / tools / accuracy_checker / accuracy_checker / metrics / detection.py
1 """
2 Copyright (c) 2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8       http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 """
16
17 import bisect
18 import enum
19 import warnings
20 from typing import List
21
22 import numpy as np
23
24 from ..utils import finalize_metric_result
25 from .overlap import Overlap, IOA
26 from ..config import BoolField, NumberField, StringField
27 from ..representation import DetectionAnnotation, DetectionPrediction
28 from .metric import BaseMetricConfig, FullDatasetEvaluationMetric
29
30
31 class APIntegralType(enum.Enum):
32     voc_11_point = '11point'
33     voc_max = 'max'
34
35
36 class BaseDetectionMetricConfig(BaseMetricConfig):
37     overlap_threshold = NumberField(min_value=0, max_value=1, optional=True)
38     ignore_difficult = BoolField(optional=True)
39     include_boundaries = BoolField(optional=True)
40     distinct_conf = BoolField(optional=True)
41     allow_multiple_matches_per_ignored = BoolField(optional=True)
42     overlap_method = StringField(optional=True, choices=Overlap.providers)
43     use_filtered_tp = BoolField(optional=True)
44
45
46 class MAPConfigValidator(BaseDetectionMetricConfig):
47     integral = StringField(choices=[e.value for e in APIntegralType], optional=True)
48
49
50 class MRConfigValidator(BaseDetectionMetricConfig):
51     fppi_level = NumberField(min_value=0, max_value=1)
52
53
54 class DAConfigValidator(BaseDetectionMetricConfig):
55     use_normalization = BoolField(optional=True)
56
57
58 class BaseDetectionMetricMixin:
59     def configure(self):
60         self.overlap_threshold = self.config.get('overlap_threshold', 0.5)
61         self.ignore_difficult = self.config.get('ignore_difficult', True)
62         self.include_boundaries = self.config.get('include_boundaries', True)
63         self.distinct_conf = self.config.get('distinct_conf', False)
64         self.allow_multiple_matches_per_ignored = self.config.get('allow_multiple_matches_per_ignored', False)
65         self.overlap_method = Overlap.provide(self.config.get('overlap', 'iou'), self.include_boundaries)
66         self.use_filtered_tp = self.config.get('use_filtered_tp', False)
67
68         label_map = self.config.get('label_map', 'label_map')
69         labels = self.dataset.metadata.get(label_map, {})
70         self.labels = labels.keys()
71         valid_labels = list(filter(lambda x: x != self.dataset.metadata.get('background_label'), self.labels))
72         self.meta['names'] = [labels[name] for name in valid_labels]
73
74     def per_class_detection_statistics(self, annotations, predictions, labels):
75         labels_stat = {}
76         for label in labels:
77             tp, fp, conf, n = bbox_match(
78                 annotations, predictions, int(label),
79                 self.overlap_method, self.overlap_threshold,
80                 self.ignore_difficult, self.allow_multiple_matches_per_ignored, self.include_boundaries,
81                 self.use_filtered_tp
82             )
83
84             if not tp.size:
85                 labels_stat[label] = {
86                     'precision': np.array([]),
87                     'recall': np.array([]),
88                     'thresholds': conf,
89                     'fppi': np.array([])
90                 }
91                 continue
92
93             # select only values for distinct confidences
94             if self.distinct_conf:
95                 distinct_value_indices = np.where(np.diff(conf))[0]
96                 threshold_indexes = np.r_[distinct_value_indices, tp.size - 1]
97             else:
98                 threshold_indexes = np.arange(conf.size)
99
100             tp, fp = np.cumsum(tp)[threshold_indexes], np.cumsum(fp)[threshold_indexes]
101
102             labels_stat[label] = {
103                 'precision': tp / np.maximum(tp + fp, np.finfo(np.float64).eps),
104                 'recall': tp / np.maximum(n, np.finfo(np.float64).eps),
105                 'thresholds': conf[threshold_indexes],
106                 'fppi': fp / len(annotations)
107             }
108
109         return labels_stat
110
111
112 class DetectionMAP(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
113     """
114     Class for evaluating mAP metric of detection models.
115     """
116
117     __provider__ = 'map'
118
119     annotation_types = (DetectionAnnotation, )
120     prediction_types = (DetectionPrediction, )
121
122     _config_validator_type = MAPConfigValidator
123
124     def configure(self):
125         super().configure()
126         self.integral = APIntegralType(self.config.get('integral', APIntegralType.voc_max))
127
128     def evaluate(self, annotations, predictions):
129         valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
130         labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
131
132         average_precisions = []
133         for label in labels_stat:
134             label_precision = labels_stat[label]['precision']
135             label_recall = labels_stat[label]['recall']
136             if label_recall.size:
137                 ap = average_precision(label_precision, label_recall, self.integral)
138                 average_precisions.append(ap)
139             else:
140                 average_precisions.append(np.nan)
141
142         average_precisions, self.meta['names'] = finalize_metric_result(average_precisions, self.meta['names'])
143         if not average_precisions:
144             warnings.warn("No detections to compute mAP")
145             average_precisions.append(0)
146
147         return average_precisions
148
149
150 class MissRate(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
151     """
152     Class for evaluating Miss Rate metric of detection models.
153     """
154
155     __provider__ = 'miss_rate'
156
157     annotation_types = (DetectionAnnotation, )
158     prediction_types = (DetectionPrediction, )
159
160     _config_validator_type = MRConfigValidator
161
162     def configure(self):
163         super().configure()
164         self.fppi_level = self.config.get('fppi_level')
165
166     def evaluate(self, annotations, predictions):
167         valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
168         labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
169
170         miss_rates = []
171         for label in labels_stat:
172             label_miss_rate = 1.0 - labels_stat[label]['recall']
173             label_fppi = labels_stat[label]['fppi']
174
175             position = bisect.bisect_left(label_fppi, self.fppi_level)
176             m0 = max(0, position - 1)
177             m1 = position if position < len(label_miss_rate) else m0
178             miss_rates.append(0.5 * (label_miss_rate[m0] + label_miss_rate[m1]))
179
180         return miss_rates
181
182
183 class Recall(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
184     """
185     Class for evaluating recall metric of detection models.
186     """
187
188     __provider__ = 'recall'
189
190     annotation_types = (DetectionAnnotation, )
191     prediction_types = (DetectionPrediction, )
192
193     _config_validator_type = BaseDetectionMetricConfig
194
195     def evaluate(self, annotations, predictions):
196         valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
197         labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
198
199         recalls = []
200         for label in labels_stat:
201             label_recall = labels_stat[label]['recall']
202             if label_recall.size:
203                 max_recall = label_recall[-1]
204                 recalls.append(max_recall)
205             else:
206                 recalls.append(np.nan)
207
208         recalls, self.meta['names'] = finalize_metric_result(recalls, self.meta['names'])
209         if not recalls:
210             warnings.warn("No detections to compute mAP")
211             recalls.append(0)
212
213         return recalls
214
215
216 class DetectionAccuracyMetric(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
217     __provider__ = 'detection_accuracy'
218
219     annotation_types = (DetectionAnnotation, )
220     prediction_types = (DetectionPrediction, )
221     _config_validator_type = DAConfigValidator
222
223     def configure(self):
224         super().configure()
225         self.use_normalization = self.config.get('use_normalization', False)
226
227     def evaluate(self, annotations, predictions):
228         all_matches, _, _ = match_detections_class_agnostic(
229             predictions, annotations, self.overlap_threshold, self.overlap_method
230         )
231         cm = confusion_matrix(all_matches, predictions, annotations, len(self.labels))
232         if self.use_normalization:
233             return np.mean(normalize_confusion_matrix(cm).diagonal())
234
235         return float(np.sum(cm.diagonal())) / float(np.maximum(1, np.sum(cm)))
236
237
238 def confusion_matrix(all_matched_ids, predicted_data, gt_data, num_classes):
239     out_cm = np.zeros([num_classes, num_classes], dtype=np.int32)
240     for gt, prediction in zip(gt_data, predicted_data):
241         for match_pair in all_matched_ids[gt.identifier]:
242             gt_label = int(gt.labels[match_pair[0]])
243             pred_label = int(prediction.labels[match_pair[1]])
244             out_cm[gt_label, pred_label] += 1
245
246     return out_cm
247
248
249 def normalize_confusion_matrix(cm):
250     row_sums = np.maximum(1, np.sum(cm, axis=1, keepdims=True)).astype(np.float32)
251     return cm.astype(np.float32) / row_sums
252
253
254 def match_detections_class_agnostic(predicted_data, gt_data, min_iou, overlap_method):
255     all_matches = {}
256     total_gt_bbox_num = 0
257     matched_gt_bbox_num = 0
258
259     for gt, prediction in zip(gt_data, predicted_data):
260         gt_bboxes = np.stack((gt.x_mins, gt.y_mins, gt.x_maxs, gt.y_maxs), axis=-1)
261         predicted_bboxes = np.stack(
262             (prediction.x_mins, prediction.y_mins, prediction.x_maxs, prediction.y_maxs), axis=-1
263         )
264
265         total_gt_bbox_num += len(gt_bboxes)
266
267         similarity_matrix = calculate_similarity_matrix(gt_bboxes, predicted_bboxes, overlap_method)
268
269         matches = []
270         for _ in gt_bboxes:
271             best_match_pos = np.unravel_index(similarity_matrix.argmax(), similarity_matrix.shape)
272             best_match_value = similarity_matrix[best_match_pos]
273
274             if best_match_value <= min_iou:
275                 break
276
277             gt_id = best_match_pos[0]
278             predicted_id = best_match_pos[1]
279
280             similarity_matrix[gt_id, :] = 0.0
281             similarity_matrix[:, predicted_id] = 0.0
282
283             matches.append((gt_id, predicted_id))
284             matched_gt_bbox_num += 1
285
286         all_matches[gt.identifier] = matches
287
288     return all_matches, total_gt_bbox_num, matched_gt_bbox_num
289
290
291 def calculate_similarity_matrix(set_a, set_b, overlap):
292     similarity = np.zeros([len(set_a), len(set_b)], dtype=np.float32)
293     for i, box_a in enumerate(set_a):
294         for j, box_b in enumerate(set_b):
295             similarity[i, j] = overlap(box_a, box_b)
296
297     return similarity
298
299
300 def average_precision(precision, recall, integral):
301     if integral == APIntegralType.voc_11_point:
302         result = 0.
303         for point in np.arange(0., 1.1, 0.1):
304             accumulator = 0 if np.sum(recall >= point) == 0 else np.max(precision[recall >= point])
305             result = result + accumulator / 11.
306
307         return result
308
309     if integral != APIntegralType.voc_max:
310         raise NotImplementedError("Integral type not implemented")
311
312     # first append sentinel values at the end
313     recall = np.concatenate(([0.], recall, [1.]))
314     precision = np.concatenate(([0.], precision, [0.]))
315
316     # compute the precision envelope
317     for i in range(precision.size - 1, 0, -1):
318         precision[i - 1] = np.maximum(precision[i - 1], precision[i])
319
320     # to calculate area under PR curve, look for points
321     # where X axis (recall) changes value
322     change_point = np.where(recall[1:] != recall[:-1])[0]
323     # and sum (\Delta recall) * recall
324     return np.sum((recall[change_point + 1] - recall[change_point]) * precision[change_point + 1])
325
326
327 def bbox_match(annotation: List[DetectionAnnotation], prediction: List[DetectionPrediction], label, overlap_evaluator,
328                overlap_thresh=0.5, ignore_difficult=True, allow_multiple_matches_per_ignored=True,
329                include_boundaries=True, use_filtered_tp=False):
330     """
331     Args:
332         annotation: ground truth bounding boxes.
333         prediction: predicted bounding boxes.
334         label: class for which bounding boxes are matched.
335         overlap_evaluator: evaluator of overlap.
336         overlap_thresh: bounding box IoU threshold.
337         ignore_difficult: ignores difficult bounding boxes (see Pascal VOC).
338         allow_multiple_matches_per_ignored: allows multiple matches per ignored.
339         include_boundaries: if is True then width and height of box is calculated by max - min + 1.
340         use_filtered_tp: if is True then ignored object are counted during evaluation.
341     Returns:
342         tp: tp[i] == 1 if detection with i-th highest score is true positive.
343         fp: fp[i] == 1 if detection with i-th highest score is false positive.
344         thresholds: array of confidence thresholds.
345         number_ground_truth = number of true positives.
346     """
347
348     used_boxes, number_ground_truth, difficult_boxes_annotation = _prepare_annotation_boxes(
349         annotation, ignore_difficult, label
350     )
351     prediction_boxes, prediction_images, difficult_boxes_prediction = _prepare_prediction_boxes(
352         label, prediction, ignore_difficult
353     )
354
355     tp = np.zeros_like(prediction_images)
356     fp = np.zeros_like(prediction_images)
357
358     for image in range(prediction_images.shape[0]):
359         gt_img = annotation[prediction_images[image]]
360         annotation_difficult = difficult_boxes_annotation[gt_img.identifier]
361         used = used_boxes[gt_img.identifier]
362
363         idx = gt_img.labels == label
364         if not np.array(idx).any():
365             fp[image] = 1
366             continue
367
368         prediction_box = prediction_boxes[image][1:]
369         annotation_boxes = gt_img.x_mins[idx], gt_img.y_mins[idx], gt_img.x_maxs[idx], gt_img.y_maxs[idx]
370
371         overlaps = overlap_evaluator(prediction_box, annotation_boxes)
372         if ignore_difficult and allow_multiple_matches_per_ignored:
373             ioa = IOA(include_boundaries)
374             ignored = np.where(annotation_difficult == 1)[0]
375             ignored_annotation_boxes = (
376                 annotation_boxes[0][ignored], annotation_boxes[1][ignored],
377                 annotation_boxes[2][ignored], annotation_boxes[3][ignored]
378             )
379             overlaps[ignored] = ioa.evaluate(prediction_box, ignored_annotation_boxes)
380
381         max_overlap = -np.inf
382
383         not_ignored_overlaps = overlaps[np.where(annotation_difficult == 0)[0]]
384         ignored_overlaps = overlaps[np.where(annotation_difficult == 1)[0]]
385         if not_ignored_overlaps.size:
386             max_overlap = np.max(not_ignored_overlaps)
387
388         if max_overlap < overlap_thresh and ignored_overlaps.size:
389             max_overlap = np.max(ignored_overlaps)
390         max_overlapped = np.where(overlaps == max_overlap)[0]
391
392         def set_false_positive(box_index):
393             is_box_difficult = difficult_boxes_prediction[box_index].any()
394             return int(not ignore_difficult or not is_box_difficult)
395
396         if max_overlap < overlap_thresh:
397             fp[image] = set_false_positive(image)
398             continue
399
400         if not annotation_difficult[max_overlapped].any():
401             if not used[max_overlapped].any():
402                 if not ignore_difficult or use_filtered_tp or not difficult_boxes_prediction[image].any():
403                     tp[image] = 1
404                     used[max_overlapped] = True
405             else:
406                 fp[image] = set_false_positive(image)
407         elif not allow_multiple_matches_per_ignored:
408             if used[max_overlapped].any():
409                 fp[image] = set_false_positive(image)
410             used[max_overlapped] = True
411
412     return tp, fp, prediction_boxes[:, 0], number_ground_truth
413
414
415 def _prepare_annotation_boxes(annotation, ignore_difficult, label):
416     used_boxes = {}
417     difficult_boxes = {}
418     num_ground_truth = 0
419
420     for ground_truth in annotation:
421         idx_for_label = ground_truth.labels == label
422         filtered_label = ground_truth.labels[idx_for_label]
423         used_ = np.zeros_like(filtered_label)
424         used_boxes[ground_truth.identifier] = used_
425         num_ground_truth += used_.shape[0]
426
427         difficult_box_mask = np.full_like(ground_truth.labels, False)
428         difficult_box_indices = ground_truth.metadata.get("difficult_boxes", [])
429         if ignore_difficult:
430             difficult_box_mask[difficult_box_indices] = True
431         difficult_box_mask = difficult_box_mask[idx_for_label]
432
433         difficult_boxes[ground_truth.identifier] = difficult_box_mask
434         if ignore_difficult:
435             num_ground_truth -= np.sum(difficult_box_mask)
436
437     return used_boxes, num_ground_truth, difficult_boxes
438
439
440 def _prepare_prediction_boxes(label, predictions, ignore_difficult):
441     prediction_images = []
442     prediction_boxes = []
443     indexes = []
444     difficult_boxes = []
445     for i, prediction in enumerate(predictions):
446         idx = prediction.labels == label
447
448         prediction_images.append(np.full(prediction.labels[idx].shape, i))
449         prediction_boxes.append(np.c_[
450             prediction.scores[idx],
451             prediction.x_mins[idx], prediction.y_mins[idx], prediction.x_maxs[idx], prediction.y_maxs[idx]
452         ])
453
454         difficult_box_mask = np.full_like(prediction.labels, False)
455         difficult_box_indices = prediction.metadata.get("difficult_boxes", [])
456         if ignore_difficult:
457             difficult_box_mask[difficult_box_indices] = True
458
459         difficult_boxes.append(difficult_box_mask)
460         indexes.append(np.argwhere(idx))
461
462     prediction_boxes = np.concatenate(prediction_boxes)
463     difficult_boxes = np.concatenate(difficult_boxes)
464     sorted_order = np.argsort(-prediction_boxes[:, 0])
465     prediction_boxes = prediction_boxes[sorted_order]
466     prediction_images = np.concatenate(prediction_images)[sorted_order]
467     difficult_boxes = difficult_boxes[sorted_order]
468
469     return prediction_boxes, prediction_images, difficult_boxes
470
471
472 def get_valid_labels(labels, background):
473     return list(filter(lambda label: label != background, labels))