2 Copyright (c) 2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
20 from typing import List
24 from ..utils import finalize_metric_result
25 from .overlap import Overlap, IOA
26 from ..config import BoolField, NumberField, StringField
27 from ..representation import DetectionAnnotation, DetectionPrediction
28 from .metric import BaseMetricConfig, FullDatasetEvaluationMetric
31 class APIntegralType(enum.Enum):
32 voc_11_point = '11point'
36 class BaseDetectionMetricConfig(BaseMetricConfig):
37 overlap_threshold = NumberField(min_value=0, max_value=1, optional=True)
38 ignore_difficult = BoolField(optional=True)
39 include_boundaries = BoolField(optional=True)
40 distinct_conf = BoolField(optional=True)
41 allow_multiple_matches_per_ignored = BoolField(optional=True)
42 overlap_method = StringField(optional=True, choices=Overlap.providers)
43 use_filtered_tp = BoolField(optional=True)
46 class MAPConfigValidator(BaseDetectionMetricConfig):
47 integral = StringField(choices=[e.value for e in APIntegralType], optional=True)
50 class MRConfigValidator(BaseDetectionMetricConfig):
51 fppi_level = NumberField(min_value=0, max_value=1)
54 class DAConfigValidator(BaseDetectionMetricConfig):
55 use_normalization = BoolField(optional=True)
58 class BaseDetectionMetricMixin:
60 self.overlap_threshold = self.config.get('overlap_threshold', 0.5)
61 self.ignore_difficult = self.config.get('ignore_difficult', True)
62 self.include_boundaries = self.config.get('include_boundaries', True)
63 self.distinct_conf = self.config.get('distinct_conf', False)
64 self.allow_multiple_matches_per_ignored = self.config.get('allow_multiple_matches_per_ignored', False)
65 self.overlap_method = Overlap.provide(self.config.get('overlap', 'iou'), self.include_boundaries)
66 self.use_filtered_tp = self.config.get('use_filtered_tp', False)
68 label_map = self.config.get('label_map', 'label_map')
69 labels = self.dataset.metadata.get(label_map, {})
70 self.labels = labels.keys()
71 valid_labels = list(filter(lambda x: x != self.dataset.metadata.get('background_label'), self.labels))
72 self.meta['names'] = [labels[name] for name in valid_labels]
74 def per_class_detection_statistics(self, annotations, predictions, labels):
77 tp, fp, conf, n = bbox_match(
78 annotations, predictions, int(label),
79 self.overlap_method, self.overlap_threshold,
80 self.ignore_difficult, self.allow_multiple_matches_per_ignored, self.include_boundaries,
85 labels_stat[label] = {
86 'precision': np.array([]),
87 'recall': np.array([]),
93 # select only values for distinct confidences
94 if self.distinct_conf:
95 distinct_value_indices = np.where(np.diff(conf))[0]
96 threshold_indexes = np.r_[distinct_value_indices, tp.size - 1]
98 threshold_indexes = np.arange(conf.size)
100 tp, fp = np.cumsum(tp)[threshold_indexes], np.cumsum(fp)[threshold_indexes]
102 labels_stat[label] = {
103 'precision': tp / np.maximum(tp + fp, np.finfo(np.float64).eps),
104 'recall': tp / np.maximum(n, np.finfo(np.float64).eps),
105 'thresholds': conf[threshold_indexes],
106 'fppi': fp / len(annotations)
112 class DetectionMAP(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
114 Class for evaluating mAP metric of detection models.
119 annotation_types = (DetectionAnnotation, )
120 prediction_types = (DetectionPrediction, )
122 _config_validator_type = MAPConfigValidator
126 self.integral = APIntegralType(self.config.get('integral', APIntegralType.voc_max))
128 def evaluate(self, annotations, predictions):
129 valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
130 labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
132 average_precisions = []
133 for label in labels_stat:
134 label_precision = labels_stat[label]['precision']
135 label_recall = labels_stat[label]['recall']
136 if label_recall.size:
137 ap = average_precision(label_precision, label_recall, self.integral)
138 average_precisions.append(ap)
140 average_precisions.append(np.nan)
142 average_precisions, self.meta['names'] = finalize_metric_result(average_precisions, self.meta['names'])
143 if not average_precisions:
144 warnings.warn("No detections to compute mAP")
145 average_precisions.append(0)
147 return average_precisions
150 class MissRate(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
152 Class for evaluating Miss Rate metric of detection models.
155 __provider__ = 'miss_rate'
157 annotation_types = (DetectionAnnotation, )
158 prediction_types = (DetectionPrediction, )
160 _config_validator_type = MRConfigValidator
164 self.fppi_level = self.config.get('fppi_level')
166 def evaluate(self, annotations, predictions):
167 valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
168 labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
171 for label in labels_stat:
172 label_miss_rate = 1.0 - labels_stat[label]['recall']
173 label_fppi = labels_stat[label]['fppi']
175 position = bisect.bisect_left(label_fppi, self.fppi_level)
176 m0 = max(0, position - 1)
177 m1 = position if position < len(label_miss_rate) else m0
178 miss_rates.append(0.5 * (label_miss_rate[m0] + label_miss_rate[m1]))
183 class Recall(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
185 Class for evaluating recall metric of detection models.
188 __provider__ = 'recall'
190 annotation_types = (DetectionAnnotation, )
191 prediction_types = (DetectionPrediction, )
193 _config_validator_type = BaseDetectionMetricConfig
195 def evaluate(self, annotations, predictions):
196 valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
197 labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
200 for label in labels_stat:
201 label_recall = labels_stat[label]['recall']
202 if label_recall.size:
203 max_recall = label_recall[-1]
204 recalls.append(max_recall)
206 recalls.append(np.nan)
208 recalls, self.meta['names'] = finalize_metric_result(recalls, self.meta['names'])
210 warnings.warn("No detections to compute mAP")
216 class DetectionAccuracyMetric(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
217 __provider__ = 'detection_accuracy'
219 annotation_types = (DetectionAnnotation, )
220 prediction_types = (DetectionPrediction, )
221 _config_validator_type = DAConfigValidator
225 self.use_normalization = self.config.get('use_normalization', False)
227 def evaluate(self, annotations, predictions):
228 all_matches, _, _ = match_detections_class_agnostic(
229 predictions, annotations, self.overlap_threshold, self.overlap_method
231 cm = confusion_matrix(all_matches, predictions, annotations, len(self.labels))
232 if self.use_normalization:
233 return np.mean(normalize_confusion_matrix(cm).diagonal())
235 return float(np.sum(cm.diagonal())) / float(np.maximum(1, np.sum(cm)))
238 def confusion_matrix(all_matched_ids, predicted_data, gt_data, num_classes):
239 out_cm = np.zeros([num_classes, num_classes], dtype=np.int32)
240 for gt, prediction in zip(gt_data, predicted_data):
241 for match_pair in all_matched_ids[gt.identifier]:
242 gt_label = int(gt.labels[match_pair[0]])
243 pred_label = int(prediction.labels[match_pair[1]])
244 out_cm[gt_label, pred_label] += 1
249 def normalize_confusion_matrix(cm):
250 row_sums = np.maximum(1, np.sum(cm, axis=1, keepdims=True)).astype(np.float32)
251 return cm.astype(np.float32) / row_sums
254 def match_detections_class_agnostic(predicted_data, gt_data, min_iou, overlap_method):
256 total_gt_bbox_num = 0
257 matched_gt_bbox_num = 0
259 for gt, prediction in zip(gt_data, predicted_data):
260 gt_bboxes = np.stack((gt.x_mins, gt.y_mins, gt.x_maxs, gt.y_maxs), axis=-1)
261 predicted_bboxes = np.stack(
262 (prediction.x_mins, prediction.y_mins, prediction.x_maxs, prediction.y_maxs), axis=-1
265 total_gt_bbox_num += len(gt_bboxes)
267 similarity_matrix = calculate_similarity_matrix(gt_bboxes, predicted_bboxes, overlap_method)
271 best_match_pos = np.unravel_index(similarity_matrix.argmax(), similarity_matrix.shape)
272 best_match_value = similarity_matrix[best_match_pos]
274 if best_match_value <= min_iou:
277 gt_id = best_match_pos[0]
278 predicted_id = best_match_pos[1]
280 similarity_matrix[gt_id, :] = 0.0
281 similarity_matrix[:, predicted_id] = 0.0
283 matches.append((gt_id, predicted_id))
284 matched_gt_bbox_num += 1
286 all_matches[gt.identifier] = matches
288 return all_matches, total_gt_bbox_num, matched_gt_bbox_num
291 def calculate_similarity_matrix(set_a, set_b, overlap):
292 similarity = np.zeros([len(set_a), len(set_b)], dtype=np.float32)
293 for i, box_a in enumerate(set_a):
294 for j, box_b in enumerate(set_b):
295 similarity[i, j] = overlap(box_a, box_b)
300 def average_precision(precision, recall, integral):
301 if integral == APIntegralType.voc_11_point:
303 for point in np.arange(0., 1.1, 0.1):
304 accumulator = 0 if np.sum(recall >= point) == 0 else np.max(precision[recall >= point])
305 result = result + accumulator / 11.
309 if integral != APIntegralType.voc_max:
310 raise NotImplementedError("Integral type not implemented")
312 # first append sentinel values at the end
313 recall = np.concatenate(([0.], recall, [1.]))
314 precision = np.concatenate(([0.], precision, [0.]))
316 # compute the precision envelope
317 for i in range(precision.size - 1, 0, -1):
318 precision[i - 1] = np.maximum(precision[i - 1], precision[i])
320 # to calculate area under PR curve, look for points
321 # where X axis (recall) changes value
322 change_point = np.where(recall[1:] != recall[:-1])[0]
323 # and sum (\Delta recall) * recall
324 return np.sum((recall[change_point + 1] - recall[change_point]) * precision[change_point + 1])
327 def bbox_match(annotation: List[DetectionAnnotation], prediction: List[DetectionPrediction], label, overlap_evaluator,
328 overlap_thresh=0.5, ignore_difficult=True, allow_multiple_matches_per_ignored=True,
329 include_boundaries=True, use_filtered_tp=False):
332 annotation: ground truth bounding boxes.
333 prediction: predicted bounding boxes.
334 label: class for which bounding boxes are matched.
335 overlap_evaluator: evaluator of overlap.
336 overlap_thresh: bounding box IoU threshold.
337 ignore_difficult: ignores difficult bounding boxes (see Pascal VOC).
338 allow_multiple_matches_per_ignored: allows multiple matches per ignored.
339 include_boundaries: if is True then width and height of box is calculated by max - min + 1.
340 use_filtered_tp: if is True then ignored object are counted during evaluation.
342 tp: tp[i] == 1 if detection with i-th highest score is true positive.
343 fp: fp[i] == 1 if detection with i-th highest score is false positive.
344 thresholds: array of confidence thresholds.
345 number_ground_truth = number of true positives.
348 used_boxes, number_ground_truth, difficult_boxes_annotation = _prepare_annotation_boxes(
349 annotation, ignore_difficult, label
351 prediction_boxes, prediction_images, difficult_boxes_prediction = _prepare_prediction_boxes(
352 label, prediction, ignore_difficult
355 tp = np.zeros_like(prediction_images)
356 fp = np.zeros_like(prediction_images)
358 for image in range(prediction_images.shape[0]):
359 gt_img = annotation[prediction_images[image]]
360 annotation_difficult = difficult_boxes_annotation[gt_img.identifier]
361 used = used_boxes[gt_img.identifier]
363 idx = gt_img.labels == label
364 if not np.array(idx).any():
368 prediction_box = prediction_boxes[image][1:]
369 annotation_boxes = gt_img.x_mins[idx], gt_img.y_mins[idx], gt_img.x_maxs[idx], gt_img.y_maxs[idx]
371 overlaps = overlap_evaluator(prediction_box, annotation_boxes)
372 if ignore_difficult and allow_multiple_matches_per_ignored:
373 ioa = IOA(include_boundaries)
374 ignored = np.where(annotation_difficult == 1)[0]
375 ignored_annotation_boxes = (
376 annotation_boxes[0][ignored], annotation_boxes[1][ignored],
377 annotation_boxes[2][ignored], annotation_boxes[3][ignored]
379 overlaps[ignored] = ioa.evaluate(prediction_box, ignored_annotation_boxes)
381 max_overlap = -np.inf
383 not_ignored_overlaps = overlaps[np.where(annotation_difficult == 0)[0]]
384 ignored_overlaps = overlaps[np.where(annotation_difficult == 1)[0]]
385 if not_ignored_overlaps.size:
386 max_overlap = np.max(not_ignored_overlaps)
388 if max_overlap < overlap_thresh and ignored_overlaps.size:
389 max_overlap = np.max(ignored_overlaps)
390 max_overlapped = np.where(overlaps == max_overlap)[0]
392 def set_false_positive(box_index):
393 is_box_difficult = difficult_boxes_prediction[box_index].any()
394 return int(not ignore_difficult or not is_box_difficult)
396 if max_overlap < overlap_thresh:
397 fp[image] = set_false_positive(image)
400 if not annotation_difficult[max_overlapped].any():
401 if not used[max_overlapped].any():
402 if not ignore_difficult or use_filtered_tp or not difficult_boxes_prediction[image].any():
404 used[max_overlapped] = True
406 fp[image] = set_false_positive(image)
407 elif not allow_multiple_matches_per_ignored:
408 if used[max_overlapped].any():
409 fp[image] = set_false_positive(image)
410 used[max_overlapped] = True
412 return tp, fp, prediction_boxes[:, 0], number_ground_truth
415 def _prepare_annotation_boxes(annotation, ignore_difficult, label):
420 for ground_truth in annotation:
421 idx_for_label = ground_truth.labels == label
422 filtered_label = ground_truth.labels[idx_for_label]
423 used_ = np.zeros_like(filtered_label)
424 used_boxes[ground_truth.identifier] = used_
425 num_ground_truth += used_.shape[0]
427 difficult_box_mask = np.full_like(ground_truth.labels, False)
428 difficult_box_indices = ground_truth.metadata.get("difficult_boxes", [])
430 difficult_box_mask[difficult_box_indices] = True
431 difficult_box_mask = difficult_box_mask[idx_for_label]
433 difficult_boxes[ground_truth.identifier] = difficult_box_mask
435 num_ground_truth -= np.sum(difficult_box_mask)
437 return used_boxes, num_ground_truth, difficult_boxes
440 def _prepare_prediction_boxes(label, predictions, ignore_difficult):
441 prediction_images = []
442 prediction_boxes = []
445 for i, prediction in enumerate(predictions):
446 idx = prediction.labels == label
448 prediction_images.append(np.full(prediction.labels[idx].shape, i))
449 prediction_boxes.append(np.c_[
450 prediction.scores[idx],
451 prediction.x_mins[idx], prediction.y_mins[idx], prediction.x_maxs[idx], prediction.y_maxs[idx]
454 difficult_box_mask = np.full_like(prediction.labels, False)
455 difficult_box_indices = prediction.metadata.get("difficult_boxes", [])
457 difficult_box_mask[difficult_box_indices] = True
459 difficult_boxes.append(difficult_box_mask)
460 indexes.append(np.argwhere(idx))
462 prediction_boxes = np.concatenate(prediction_boxes)
463 difficult_boxes = np.concatenate(difficult_boxes)
464 sorted_order = np.argsort(-prediction_boxes[:, 0])
465 prediction_boxes = prediction_boxes[sorted_order]
466 prediction_images = np.concatenate(prediction_images)[sorted_order]
467 difficult_boxes = difficult_boxes[sorted_order]
469 return prediction_boxes, prediction_images, difficult_boxes
472 def get_valid_labels(labels, background):
473 return list(filter(lambda label: label != background, labels))