2 Copyright (c) 2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
20 from typing import List
24 from ..utils import finalize_metric_result
25 from .overlap import Overlap, IOA
26 from ..config import BoolField, NumberField, StringField
27 from ..representation import DetectionAnnotation, DetectionPrediction
28 from .metric import BaseMetricConfig, FullDatasetEvaluationMetric
31 class APIntegralType(enum.Enum):
32 voc_11_point = '11point'
36 class BaseDetectionMetricConfig(BaseMetricConfig):
37 overlap_threshold = NumberField(min_value=0, max_value=1, optional=True)
38 ignore_difficult = BoolField(optional=True)
39 include_boundaries = BoolField(optional=True)
40 distinct_conf = BoolField(optional=True)
41 allow_multiple_matches_per_ignored = BoolField(optional=True)
42 overlap_method = StringField(optional=True, choices=Overlap.providers)
43 use_filtered_tp = BoolField(optional=True)
46 class BaseDetectionMetricMixin:
48 self.overlap_threshold = self.config.get('overlap_threshold', 0.5)
49 self.ignore_difficult = self.config.get('ignore_difficult', True)
50 self.include_boundaries = self.config.get('include_boundaries', True)
51 self.distinct_conf = self.config.get('distinct_conf', False)
52 self.allow_multiple_matches_per_ignored = self.config.get('allow_multiple_matches_per_ignored', False)
53 self.overlap_method = Overlap.provide(self.config.get('overlap', 'iou'), self.include_boundaries)
54 self.use_filtered_tp = self.config.get('use_filtered_tp', False)
56 label_map = self.config.get('label_map', 'label_map')
57 labels = self.dataset.metadata.get(label_map, {})
58 self.labels = labels.keys()
59 valid_labels = list(filter(lambda x: x != self.dataset.metadata.get('background_label'), self.labels))
60 self.meta['names'] = [labels[name] for name in valid_labels]
62 def per_class_detection_statistics(self, annotations, predictions, labels):
65 tp, fp, conf, n = bbox_match(
66 annotations, predictions, int(label),
67 self.overlap_method, self.overlap_threshold,
68 self.ignore_difficult, self.allow_multiple_matches_per_ignored, self.include_boundaries,
73 labels_stat[label] = {
74 'precision': np.array([]),
75 'recall': np.array([]),
81 # select only values for distinct confidences
82 if self.distinct_conf:
83 distinct_value_indices = np.where(np.diff(conf))[0]
84 threshold_indexes = np.r_[distinct_value_indices, tp.size - 1]
86 threshold_indexes = np.arange(conf.size)
88 tp, fp = np.cumsum(tp)[threshold_indexes], np.cumsum(fp)[threshold_indexes]
90 labels_stat[label] = {
91 'precision': tp / np.maximum(tp + fp, np.finfo(np.float64).eps),
92 'recall': tp / np.maximum(n, np.finfo(np.float64).eps),
93 'thresholds': conf[threshold_indexes],
94 'fppi': fp / len(annotations)
100 class DetectionMAP(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
102 Class for evaluating mAP metric of detection models.
107 annotation_types = (DetectionAnnotation, )
108 prediction_types = (DetectionPrediction, )
110 def validate_config(self):
111 class _MAPConfigValidator(BaseDetectionMetricConfig):
112 integral = StringField(choices=[e.value for e in APIntegralType], optional=True)
114 map_config_validator = _MAPConfigValidator(
115 self.__provider__, on_extra_argument=_MAPConfigValidator.ERROR_ON_EXTRA_ARGUMENT
117 map_config_validator.validate(self.config)
121 self.integral = APIntegralType(self.config.get('integral', APIntegralType.voc_max))
123 def evaluate(self, annotations, predictions):
124 valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
125 labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
127 average_precisions = []
128 for label in labels_stat:
129 label_precision = labels_stat[label]['precision']
130 label_recall = labels_stat[label]['recall']
131 if label_recall.size:
132 ap = average_precision(label_precision, label_recall, self.integral)
133 average_precisions.append(ap)
135 average_precisions.append(np.nan)
137 average_precisions, self.meta['names'] = finalize_metric_result(average_precisions, self.meta['names'])
138 if not average_precisions:
139 warnings.warn("No detections to compute mAP")
140 average_precisions.append(0)
142 return average_precisions
145 class MissRate(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
147 Class for evaluating Miss Rate metric of detection models.
150 __provider__ = 'miss_rate'
152 annotation_types = (DetectionAnnotation, )
153 prediction_types = (DetectionPrediction, )
155 def validate_config(self):
156 class _MRConfigValidator(BaseDetectionMetricConfig):
157 fppi_level = NumberField(min_value=0, max_value=1)
159 nms_config_validator = _MRConfigValidator(
160 self.__provider__, on_extra_argument=_MRConfigValidator.ERROR_ON_EXTRA_ARGUMENT
162 nms_config_validator.validate(self.config)
166 self.fppi_level = self.config.get('fppi_level')
168 def evaluate(self, annotations, predictions):
169 valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
170 labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
173 for label in labels_stat:
174 label_miss_rate = 1.0 - labels_stat[label]['recall']
175 label_fppi = labels_stat[label]['fppi']
177 position = bisect.bisect_left(label_fppi, self.fppi_level)
178 m0 = max(0, position - 1)
179 m1 = position if position < len(label_miss_rate) else m0
180 miss_rates.append(0.5 * (label_miss_rate[m0] + label_miss_rate[m1]))
185 class Recall(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
187 Class for evaluating recall metric of detection models.
190 __provider__ = 'recall'
192 annotation_types = (DetectionAnnotation, )
193 prediction_types = (DetectionPrediction, )
195 def validate_config(self):
196 recall_config_validator = BaseDetectionMetricConfig(
197 self.__provider__, on_extra_argument=BaseDetectionMetricConfig.ERROR_ON_EXTRA_ARGUMENT
199 recall_config_validator.validate(self.config)
201 def evaluate(self, annotations, predictions):
202 valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
203 labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
206 for label in labels_stat:
207 label_recall = labels_stat[label]['recall']
208 if label_recall.size:
209 max_recall = label_recall[-1]
210 recalls.append(max_recall)
212 recalls.append(np.nan)
214 recalls, self.meta['names'] = finalize_metric_result(recalls, self.meta['names'])
216 warnings.warn("No detections to compute mAP")
222 class DetectionAccuracyMetric(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
223 __provider__ = 'detection_accuracy'
225 annotation_types = (DetectionAnnotation, )
226 prediction_types = (DetectionPrediction, )
228 def validate_config(self):
229 class _DAConfigValidator(BaseDetectionMetricConfig):
230 use_normalization = BoolField(optional=True)
232 da_config_validator = _DAConfigValidator(
233 self.__provider__, on_extra_argument=_DAConfigValidator.ERROR_ON_EXTRA_ARGUMENT
235 da_config_validator.validate(self.config)
239 self.use_normalization = self.config.get('use_normalization', False)
241 def evaluate(self, annotations, predictions):
242 all_matches, _, _ = match_detections_class_agnostic(
243 predictions, annotations, self.overlap_threshold, self.overlap_method
245 cm = confusion_matrix(all_matches, predictions, annotations, len(self.labels))
246 if self.use_normalization:
247 return np.mean(normalize_confusion_matrix(cm).diagonal())
249 return float(np.sum(cm.diagonal())) / float(np.maximum(1, np.sum(cm)))
252 def confusion_matrix(all_matched_ids, predicted_data, gt_data, num_classes):
253 out_cm = np.zeros([num_classes, num_classes], dtype=np.int32)
254 for gt, prediction in zip(gt_data, predicted_data):
255 for match_pair in all_matched_ids[gt.identifier]:
256 gt_label = int(gt.labels[match_pair[0]])
257 pred_label = int(prediction.labels[match_pair[1]])
258 out_cm[gt_label, pred_label] += 1
263 def normalize_confusion_matrix(cm):
264 row_sums = np.maximum(1, np.sum(cm, axis=1, keepdims=True)).astype(np.float32)
265 return cm.astype(np.float32) / row_sums
268 def match_detections_class_agnostic(predicted_data, gt_data, min_iou, overlap_method):
270 total_gt_bbox_num = 0
271 matched_gt_bbox_num = 0
273 for gt, prediction in zip(gt_data, predicted_data):
274 gt_bboxes = np.stack((gt.x_mins, gt.y_mins, gt.x_maxs, gt.y_maxs), axis=-1)
275 predicted_bboxes = np.stack(
276 (prediction.x_mins, prediction.y_mins, prediction.x_maxs, prediction.y_maxs), axis=-1
279 total_gt_bbox_num += len(gt_bboxes)
281 similarity_matrix = calculate_similarity_matrix(gt_bboxes, predicted_bboxes, overlap_method)
285 best_match_pos = np.unravel_index(similarity_matrix.argmax(), similarity_matrix.shape)
286 best_match_value = similarity_matrix[best_match_pos]
288 if best_match_value <= min_iou:
291 gt_id = best_match_pos[0]
292 predicted_id = best_match_pos[1]
294 similarity_matrix[gt_id, :] = 0.0
295 similarity_matrix[:, predicted_id] = 0.0
297 matches.append((gt_id, predicted_id))
298 matched_gt_bbox_num += 1
300 all_matches[gt.identifier] = matches
302 return all_matches, total_gt_bbox_num, matched_gt_bbox_num
305 def calculate_similarity_matrix(set_a, set_b, overlap):
306 similarity = np.zeros([len(set_a), len(set_b)], dtype=np.float32)
307 for i, box_a in enumerate(set_a):
308 for j, box_b in enumerate(set_b):
309 similarity[i, j] = overlap(box_a, box_b)
314 def average_precision(precision, recall, integral):
315 if integral == APIntegralType.voc_11_point:
317 for point in np.arange(0., 1.1, 0.1):
318 accumulator = 0 if np.sum(recall >= point) == 0 else np.max(precision[recall >= point])
319 result = result + accumulator / 11.
323 if integral != APIntegralType.voc_max:
324 raise NotImplementedError("Integral type not implemented")
326 # first append sentinel values at the end
327 recall = np.concatenate(([0.], recall, [1.]))
328 precision = np.concatenate(([0.], precision, [0.]))
330 # compute the precision envelope
331 for i in range(precision.size - 1, 0, -1):
332 precision[i - 1] = np.maximum(precision[i - 1], precision[i])
334 # to calculate area under PR curve, look for points
335 # where X axis (recall) changes value
336 change_point = np.where(recall[1:] != recall[:-1])[0]
337 # and sum (\Delta recall) * recall
338 return np.sum((recall[change_point + 1] - recall[change_point]) * precision[change_point + 1])
341 def bbox_match(annotation: List[DetectionAnnotation], prediction: List[DetectionPrediction], label, overlap_evaluator,
342 overlap_thresh=0.5, ignore_difficult=True, allow_multiple_matches_per_ignored=True,
343 include_boundaries=True, use_filtered_tp=False):
346 annotation: ground truth bounding boxes.
347 prediction: predicted bounding boxes.
348 label: class for which bounding boxes are matched.
349 overlap_evaluator: evaluator of overlap.
350 overlap_thresh: bounding box IoU threshold.
351 ignore_difficult: ignores difficult bounding boxes (see Pascal VOC).
352 allow_multiple_matches_per_ignored: allows multiple matches per ignored.
353 include_boundaries: if is True then width and height of box is calculated by max - min + 1.
354 use_filtered_tp: if is True then ignored object are counted during evaluation.
356 tp: tp[i] == 1 if detection with i-th highest score is true positive.
357 fp: fp[i] == 1 if detection with i-th highest score is false positive.
358 thresholds: array of confidence thresholds.
359 number_ground_truth = number of true positives.
362 used_boxes, number_ground_truth, difficult_boxes_annotation = _prepare_annotation_boxes(
363 annotation, ignore_difficult, label
365 prediction_boxes, prediction_images, difficult_boxes_prediction = _prepare_prediction_boxes(
366 label, prediction, ignore_difficult
369 tp = np.zeros_like(prediction_images)
370 fp = np.zeros_like(prediction_images)
372 for image in range(prediction_images.shape[0]):
373 gt_img = annotation[prediction_images[image]]
374 annotation_difficult = difficult_boxes_annotation[gt_img.identifier]
375 used = used_boxes[gt_img.identifier]
377 idx = gt_img.labels == label
378 if not np.array(idx).any():
382 prediction_box = prediction_boxes[image][1:]
383 annotation_boxes = gt_img.x_mins[idx], gt_img.y_mins[idx], gt_img.x_maxs[idx], gt_img.y_maxs[idx]
385 overlaps = overlap_evaluator(prediction_box, annotation_boxes)
386 if ignore_difficult and allow_multiple_matches_per_ignored:
387 ioa = IOA(include_boundaries)
388 ignored = np.where(annotation_difficult == 1)[0]
389 ignored_annotation_boxes = (
390 annotation_boxes[0][ignored], annotation_boxes[1][ignored],
391 annotation_boxes[2][ignored], annotation_boxes[3][ignored]
393 overlaps[ignored] = ioa.evaluate(prediction_box, ignored_annotation_boxes)
395 max_overlap = -np.inf
397 not_ignored_overlaps = overlaps[np.where(annotation_difficult == 0)[0]]
398 ignored_overlaps = overlaps[np.where(annotation_difficult == 1)[0]]
399 if not_ignored_overlaps.size:
400 max_overlap = np.max(not_ignored_overlaps)
402 if max_overlap < overlap_thresh and ignored_overlaps.size:
403 max_overlap = np.max(ignored_overlaps)
404 max_overlapped = np.where(overlaps == max_overlap)[0]
406 def set_false_positive(box_index):
407 is_box_difficult = difficult_boxes_prediction[box_index].any()
408 return int(not ignore_difficult or not is_box_difficult)
410 if max_overlap < overlap_thresh:
411 fp[image] = set_false_positive(image)
414 if not annotation_difficult[max_overlapped].any():
415 if not used[max_overlapped].any():
416 if not ignore_difficult or use_filtered_tp or not difficult_boxes_prediction[image].any():
418 used[max_overlapped] = True
420 fp[image] = set_false_positive(image)
421 elif not allow_multiple_matches_per_ignored:
422 if used[max_overlapped].any():
423 fp[image] = set_false_positive(image)
424 used[max_overlapped] = True
426 return tp, fp, prediction_boxes[:, 0], number_ground_truth
429 def _prepare_annotation_boxes(annotation, ignore_difficult, label):
434 for ground_truth in annotation:
435 idx_for_label = ground_truth.labels == label
436 filtered_label = ground_truth.labels[idx_for_label]
437 used_ = np.zeros_like(filtered_label)
438 used_boxes[ground_truth.identifier] = used_
439 num_ground_truth += used_.shape[0]
441 difficult_box_mask = np.full_like(ground_truth.labels, False)
442 difficult_box_indices = ground_truth.metadata.get("difficult_boxes", [])
444 difficult_box_mask[difficult_box_indices] = True
445 difficult_box_mask = difficult_box_mask[idx_for_label]
447 difficult_boxes[ground_truth.identifier] = difficult_box_mask
449 num_ground_truth -= np.sum(difficult_box_mask)
451 return used_boxes, num_ground_truth, difficult_boxes
454 def _prepare_prediction_boxes(label, predictions, ignore_difficult):
455 prediction_images = []
456 prediction_boxes = []
459 for i, prediction in enumerate(predictions):
460 idx = prediction.labels == label
462 prediction_images.append(np.full(prediction.labels[idx].shape, i))
463 prediction_boxes.append(np.c_[
464 prediction.scores[idx],
465 prediction.x_mins[idx], prediction.y_mins[idx], prediction.x_maxs[idx], prediction.y_maxs[idx]
468 difficult_box_mask = np.full_like(prediction.labels, False)
469 difficult_box_indices = prediction.metadata.get("difficult_boxes", [])
471 difficult_box_mask[difficult_box_indices] = True
473 difficult_boxes.append(difficult_box_mask)
474 indexes.append(np.argwhere(idx))
476 prediction_boxes = np.concatenate(prediction_boxes)
477 difficult_boxes = np.concatenate(difficult_boxes)
478 sorted_order = np.argsort(-prediction_boxes[:, 0])
479 prediction_boxes = prediction_boxes[sorted_order]
480 prediction_images = np.concatenate(prediction_images)[sorted_order]
481 difficult_boxes = difficult_boxes[sorted_order]
483 return prediction_boxes, prediction_images, difficult_boxes
486 def get_valid_labels(labels, background):
487 return list(filter(lambda label: label != background, labels))