Publishing 2019 R1 content
[platform/upstream/dldt.git] / tools / accuracy_checker / accuracy_checker / metrics / detection.py
1 """
2 Copyright (c) 2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8       http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 """
16
17 import bisect
18 import enum
19 import warnings
20 from typing import List
21
22 import numpy as np
23
24 from ..utils import finalize_metric_result
25 from .overlap import Overlap, IOA
26 from ..config import BoolField, NumberField, StringField
27 from ..representation import DetectionAnnotation, DetectionPrediction
28 from .metric import BaseMetricConfig, FullDatasetEvaluationMetric
29
30
31 class APIntegralType(enum.Enum):
32     voc_11_point = '11point'
33     voc_max = 'max'
34
35
36 class BaseDetectionMetricConfig(BaseMetricConfig):
37     overlap_threshold = NumberField(min_value=0, max_value=1, optional=True)
38     ignore_difficult = BoolField(optional=True)
39     include_boundaries = BoolField(optional=True)
40     distinct_conf = BoolField(optional=True)
41     allow_multiple_matches_per_ignored = BoolField(optional=True)
42     overlap_method = StringField(optional=True, choices=Overlap.providers)
43     use_filtered_tp = BoolField(optional=True)
44
45
46 class BaseDetectionMetricMixin:
47     def configure(self):
48         self.overlap_threshold = self.config.get('overlap_threshold', 0.5)
49         self.ignore_difficult = self.config.get('ignore_difficult', True)
50         self.include_boundaries = self.config.get('include_boundaries', True)
51         self.distinct_conf = self.config.get('distinct_conf', False)
52         self.allow_multiple_matches_per_ignored = self.config.get('allow_multiple_matches_per_ignored', False)
53         self.overlap_method = Overlap.provide(self.config.get('overlap', 'iou'), self.include_boundaries)
54         self.use_filtered_tp = self.config.get('use_filtered_tp', False)
55
56         label_map = self.config.get('label_map', 'label_map')
57         labels = self.dataset.metadata.get(label_map, {})
58         self.labels = labels.keys()
59         valid_labels = list(filter(lambda x: x != self.dataset.metadata.get('background_label'), self.labels))
60         self.meta['names'] = [labels[name] for name in valid_labels]
61
62     def per_class_detection_statistics(self, annotations, predictions, labels):
63         labels_stat = {}
64         for label in labels:
65             tp, fp, conf, n = bbox_match(
66                 annotations, predictions, int(label),
67                 self.overlap_method, self.overlap_threshold,
68                 self.ignore_difficult, self.allow_multiple_matches_per_ignored, self.include_boundaries,
69                 self.use_filtered_tp
70             )
71
72             if not tp.size:
73                 labels_stat[label] = {
74                     'precision': np.array([]),
75                     'recall': np.array([]),
76                     'thresholds': conf,
77                     'fppi': np.array([])
78                 }
79                 continue
80
81             # select only values for distinct confidences
82             if self.distinct_conf:
83                 distinct_value_indices = np.where(np.diff(conf))[0]
84                 threshold_indexes = np.r_[distinct_value_indices, tp.size - 1]
85             else:
86                 threshold_indexes = np.arange(conf.size)
87
88             tp, fp = np.cumsum(tp)[threshold_indexes], np.cumsum(fp)[threshold_indexes]
89
90             labels_stat[label] = {
91                 'precision': tp / np.maximum(tp + fp, np.finfo(np.float64).eps),
92                 'recall': tp / np.maximum(n, np.finfo(np.float64).eps),
93                 'thresholds': conf[threshold_indexes],
94                 'fppi': fp / len(annotations)
95             }
96
97         return labels_stat
98
99
100 class DetectionMAP(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
101     """
102     Class for evaluating mAP metric of detection models.
103     """
104
105     __provider__ = 'map'
106
107     annotation_types = (DetectionAnnotation, )
108     prediction_types = (DetectionPrediction, )
109
110     def validate_config(self):
111         class _MAPConfigValidator(BaseDetectionMetricConfig):
112             integral = StringField(choices=[e.value for e in APIntegralType], optional=True)
113
114         map_config_validator = _MAPConfigValidator(
115             self.__provider__, on_extra_argument=_MAPConfigValidator.ERROR_ON_EXTRA_ARGUMENT
116         )
117         map_config_validator.validate(self.config)
118
119     def configure(self):
120         super().configure()
121         self.integral = APIntegralType(self.config.get('integral', APIntegralType.voc_max))
122
123     def evaluate(self, annotations, predictions):
124         valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
125         labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
126
127         average_precisions = []
128         for label in labels_stat:
129             label_precision = labels_stat[label]['precision']
130             label_recall = labels_stat[label]['recall']
131             if label_recall.size:
132                 ap = average_precision(label_precision, label_recall, self.integral)
133                 average_precisions.append(ap)
134             else:
135                 average_precisions.append(np.nan)
136
137         average_precisions, self.meta['names'] = finalize_metric_result(average_precisions, self.meta['names'])
138         if not average_precisions:
139             warnings.warn("No detections to compute mAP")
140             average_precisions.append(0)
141
142         return average_precisions
143
144
145 class MissRate(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
146     """
147     Class for evaluating Miss Rate metric of detection models.
148     """
149
150     __provider__ = 'miss_rate'
151
152     annotation_types = (DetectionAnnotation, )
153     prediction_types = (DetectionPrediction, )
154
155     def validate_config(self):
156         class _MRConfigValidator(BaseDetectionMetricConfig):
157             fppi_level = NumberField(min_value=0, max_value=1)
158
159         nms_config_validator = _MRConfigValidator(
160             self.__provider__, on_extra_argument=_MRConfigValidator.ERROR_ON_EXTRA_ARGUMENT
161         )
162         nms_config_validator.validate(self.config)
163
164     def configure(self):
165         super().configure()
166         self.fppi_level = self.config.get('fppi_level')
167
168     def evaluate(self, annotations, predictions):
169         valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
170         labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
171
172         miss_rates = []
173         for label in labels_stat:
174             label_miss_rate = 1.0 - labels_stat[label]['recall']
175             label_fppi = labels_stat[label]['fppi']
176
177             position = bisect.bisect_left(label_fppi, self.fppi_level)
178             m0 = max(0, position - 1)
179             m1 = position if position < len(label_miss_rate) else m0
180             miss_rates.append(0.5 * (label_miss_rate[m0] + label_miss_rate[m1]))
181
182         return miss_rates
183
184
185 class Recall(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
186     """
187     Class for evaluating recall metric of detection models.
188     """
189
190     __provider__ = 'recall'
191
192     annotation_types = (DetectionAnnotation, )
193     prediction_types = (DetectionPrediction, )
194
195     def validate_config(self):
196         recall_config_validator = BaseDetectionMetricConfig(
197             self.__provider__, on_extra_argument=BaseDetectionMetricConfig.ERROR_ON_EXTRA_ARGUMENT
198         )
199         recall_config_validator.validate(self.config)
200
201     def evaluate(self, annotations, predictions):
202         valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
203         labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
204
205         recalls = []
206         for label in labels_stat:
207             label_recall = labels_stat[label]['recall']
208             if label_recall.size:
209                 max_recall = label_recall[-1]
210                 recalls.append(max_recall)
211             else:
212                 recalls.append(np.nan)
213
214         recalls, self.meta['names'] = finalize_metric_result(recalls, self.meta['names'])
215         if not recalls:
216             warnings.warn("No detections to compute mAP")
217             recalls.append(0)
218
219         return recalls
220
221
222 class DetectionAccuracyMetric(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
223     __provider__ = 'detection_accuracy'
224
225     annotation_types = (DetectionAnnotation, )
226     prediction_types = (DetectionPrediction, )
227
228     def validate_config(self):
229         class _DAConfigValidator(BaseDetectionMetricConfig):
230             use_normalization = BoolField(optional=True)
231
232         da_config_validator = _DAConfigValidator(
233             self.__provider__, on_extra_argument=_DAConfigValidator.ERROR_ON_EXTRA_ARGUMENT
234         )
235         da_config_validator.validate(self.config)
236
237     def configure(self):
238         super().configure()
239         self.use_normalization = self.config.get('use_normalization', False)
240
241     def evaluate(self, annotations, predictions):
242         all_matches, _, _ = match_detections_class_agnostic(
243             predictions, annotations, self.overlap_threshold, self.overlap_method
244         )
245         cm = confusion_matrix(all_matches, predictions, annotations, len(self.labels))
246         if self.use_normalization:
247             return np.mean(normalize_confusion_matrix(cm).diagonal())
248
249         return float(np.sum(cm.diagonal())) / float(np.maximum(1, np.sum(cm)))
250
251
252 def confusion_matrix(all_matched_ids, predicted_data, gt_data, num_classes):
253     out_cm = np.zeros([num_classes, num_classes], dtype=np.int32)
254     for gt, prediction in zip(gt_data, predicted_data):
255         for match_pair in all_matched_ids[gt.identifier]:
256             gt_label = int(gt.labels[match_pair[0]])
257             pred_label = int(prediction.labels[match_pair[1]])
258             out_cm[gt_label, pred_label] += 1
259
260     return out_cm
261
262
263 def normalize_confusion_matrix(cm):
264     row_sums = np.maximum(1, np.sum(cm, axis=1, keepdims=True)).astype(np.float32)
265     return cm.astype(np.float32) / row_sums
266
267
268 def match_detections_class_agnostic(predicted_data, gt_data, min_iou, overlap_method):
269     all_matches = {}
270     total_gt_bbox_num = 0
271     matched_gt_bbox_num = 0
272
273     for gt, prediction in zip(gt_data, predicted_data):
274         gt_bboxes = np.stack((gt.x_mins, gt.y_mins, gt.x_maxs, gt.y_maxs), axis=-1)
275         predicted_bboxes = np.stack(
276             (prediction.x_mins, prediction.y_mins, prediction.x_maxs, prediction.y_maxs), axis=-1
277         )
278
279         total_gt_bbox_num += len(gt_bboxes)
280
281         similarity_matrix = calculate_similarity_matrix(gt_bboxes, predicted_bboxes, overlap_method)
282
283         matches = []
284         for _ in gt_bboxes:
285             best_match_pos = np.unravel_index(similarity_matrix.argmax(), similarity_matrix.shape)
286             best_match_value = similarity_matrix[best_match_pos]
287
288             if best_match_value <= min_iou:
289                 break
290
291             gt_id = best_match_pos[0]
292             predicted_id = best_match_pos[1]
293
294             similarity_matrix[gt_id, :] = 0.0
295             similarity_matrix[:, predicted_id] = 0.0
296
297             matches.append((gt_id, predicted_id))
298             matched_gt_bbox_num += 1
299
300         all_matches[gt.identifier] = matches
301
302     return all_matches, total_gt_bbox_num, matched_gt_bbox_num
303
304
305 def calculate_similarity_matrix(set_a, set_b, overlap):
306     similarity = np.zeros([len(set_a), len(set_b)], dtype=np.float32)
307     for i, box_a in enumerate(set_a):
308         for j, box_b in enumerate(set_b):
309             similarity[i, j] = overlap(box_a, box_b)
310
311     return similarity
312
313
314 def average_precision(precision, recall, integral):
315     if integral == APIntegralType.voc_11_point:
316         result = 0.
317         for point in np.arange(0., 1.1, 0.1):
318             accumulator = 0 if np.sum(recall >= point) == 0 else np.max(precision[recall >= point])
319             result = result + accumulator / 11.
320
321         return result
322
323     if integral != APIntegralType.voc_max:
324         raise NotImplementedError("Integral type not implemented")
325
326     # first append sentinel values at the end
327     recall = np.concatenate(([0.], recall, [1.]))
328     precision = np.concatenate(([0.], precision, [0.]))
329
330     # compute the precision envelope
331     for i in range(precision.size - 1, 0, -1):
332         precision[i - 1] = np.maximum(precision[i - 1], precision[i])
333
334     # to calculate area under PR curve, look for points
335     # where X axis (recall) changes value
336     change_point = np.where(recall[1:] != recall[:-1])[0]
337     # and sum (\Delta recall) * recall
338     return np.sum((recall[change_point + 1] - recall[change_point]) * precision[change_point + 1])
339
340
341 def bbox_match(annotation: List[DetectionAnnotation], prediction: List[DetectionPrediction], label, overlap_evaluator,
342                overlap_thresh=0.5, ignore_difficult=True, allow_multiple_matches_per_ignored=True,
343                include_boundaries=True, use_filtered_tp=False):
344     """
345     Args:
346         annotation: ground truth bounding boxes.
347         prediction: predicted bounding boxes.
348         label: class for which bounding boxes are matched.
349         overlap_evaluator: evaluator of overlap.
350         overlap_thresh: bounding box IoU threshold.
351         ignore_difficult: ignores difficult bounding boxes (see Pascal VOC).
352         allow_multiple_matches_per_ignored: allows multiple matches per ignored.
353         include_boundaries: if is True then width and height of box is calculated by max - min + 1.
354         use_filtered_tp: if is True then ignored object are counted during evaluation.
355     Returns:
356         tp: tp[i] == 1 if detection with i-th highest score is true positive.
357         fp: fp[i] == 1 if detection with i-th highest score is false positive.
358         thresholds: array of confidence thresholds.
359         number_ground_truth = number of true positives.
360     """
361
362     used_boxes, number_ground_truth, difficult_boxes_annotation = _prepare_annotation_boxes(
363         annotation, ignore_difficult, label
364     )
365     prediction_boxes, prediction_images, difficult_boxes_prediction = _prepare_prediction_boxes(
366         label, prediction, ignore_difficult
367     )
368
369     tp = np.zeros_like(prediction_images)
370     fp = np.zeros_like(prediction_images)
371
372     for image in range(prediction_images.shape[0]):
373         gt_img = annotation[prediction_images[image]]
374         annotation_difficult = difficult_boxes_annotation[gt_img.identifier]
375         used = used_boxes[gt_img.identifier]
376
377         idx = gt_img.labels == label
378         if not np.array(idx).any():
379             fp[image] = 1
380             continue
381
382         prediction_box = prediction_boxes[image][1:]
383         annotation_boxes = gt_img.x_mins[idx], gt_img.y_mins[idx], gt_img.x_maxs[idx], gt_img.y_maxs[idx]
384
385         overlaps = overlap_evaluator(prediction_box, annotation_boxes)
386         if ignore_difficult and allow_multiple_matches_per_ignored:
387             ioa = IOA(include_boundaries)
388             ignored = np.where(annotation_difficult == 1)[0]
389             ignored_annotation_boxes = (
390                 annotation_boxes[0][ignored], annotation_boxes[1][ignored],
391                 annotation_boxes[2][ignored], annotation_boxes[3][ignored]
392             )
393             overlaps[ignored] = ioa.evaluate(prediction_box, ignored_annotation_boxes)
394
395         max_overlap = -np.inf
396
397         not_ignored_overlaps = overlaps[np.where(annotation_difficult == 0)[0]]
398         ignored_overlaps = overlaps[np.where(annotation_difficult == 1)[0]]
399         if not_ignored_overlaps.size:
400             max_overlap = np.max(not_ignored_overlaps)
401
402         if max_overlap < overlap_thresh and ignored_overlaps.size:
403             max_overlap = np.max(ignored_overlaps)
404         max_overlapped = np.where(overlaps == max_overlap)[0]
405
406         def set_false_positive(box_index):
407             is_box_difficult = difficult_boxes_prediction[box_index].any()
408             return int(not ignore_difficult or not is_box_difficult)
409
410         if max_overlap < overlap_thresh:
411             fp[image] = set_false_positive(image)
412             continue
413
414         if not annotation_difficult[max_overlapped].any():
415             if not used[max_overlapped].any():
416                 if not ignore_difficult or use_filtered_tp or not difficult_boxes_prediction[image].any():
417                     tp[image] = 1
418                     used[max_overlapped] = True
419             else:
420                 fp[image] = set_false_positive(image)
421         elif not allow_multiple_matches_per_ignored:
422             if used[max_overlapped].any():
423                 fp[image] = set_false_positive(image)
424             used[max_overlapped] = True
425
426     return tp, fp, prediction_boxes[:, 0], number_ground_truth
427
428
429 def _prepare_annotation_boxes(annotation, ignore_difficult, label):
430     used_boxes = {}
431     difficult_boxes = {}
432     num_ground_truth = 0
433
434     for ground_truth in annotation:
435         idx_for_label = ground_truth.labels == label
436         filtered_label = ground_truth.labels[idx_for_label]
437         used_ = np.zeros_like(filtered_label)
438         used_boxes[ground_truth.identifier] = used_
439         num_ground_truth += used_.shape[0]
440
441         difficult_box_mask = np.full_like(ground_truth.labels, False)
442         difficult_box_indices = ground_truth.metadata.get("difficult_boxes", [])
443         if ignore_difficult:
444             difficult_box_mask[difficult_box_indices] = True
445         difficult_box_mask = difficult_box_mask[idx_for_label]
446
447         difficult_boxes[ground_truth.identifier] = difficult_box_mask
448         if ignore_difficult:
449             num_ground_truth -= np.sum(difficult_box_mask)
450
451     return used_boxes, num_ground_truth, difficult_boxes
452
453
454 def _prepare_prediction_boxes(label, predictions, ignore_difficult):
455     prediction_images = []
456     prediction_boxes = []
457     indexes = []
458     difficult_boxes = []
459     for i, prediction in enumerate(predictions):
460         idx = prediction.labels == label
461
462         prediction_images.append(np.full(prediction.labels[idx].shape, i))
463         prediction_boxes.append(np.c_[
464             prediction.scores[idx],
465             prediction.x_mins[idx], prediction.y_mins[idx], prediction.x_maxs[idx], prediction.y_maxs[idx]
466         ])
467
468         difficult_box_mask = np.full_like(prediction.labels, False)
469         difficult_box_indices = prediction.metadata.get("difficult_boxes", [])
470         if ignore_difficult:
471             difficult_box_mask[difficult_box_indices] = True
472
473         difficult_boxes.append(difficult_box_mask)
474         indexes.append(np.argwhere(idx))
475
476     prediction_boxes = np.concatenate(prediction_boxes)
477     difficult_boxes = np.concatenate(difficult_boxes)
478     sorted_order = np.argsort(-prediction_boxes[:, 0])
479     prediction_boxes = prediction_boxes[sorted_order]
480     prediction_images = np.concatenate(prediction_images)[sorted_order]
481     difficult_boxes = difficult_boxes[sorted_order]
482
483     return prediction_boxes, prediction_images, difficult_boxes
484
485
486 def get_valid_labels(labels, background):
487     return list(filter(lambda label: label != background, labels))