2 Copyright (c) 2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
17 from functools import singledispatch
18 from typing import Union
20 from ..config import NumberField, BaseField
21 from ..representation import (
24 PoseEstimationPrediction,
25 PoseEstimationAnnotation
27 from ..utils import get_or_parse_value
28 from .overlap import Overlap
29 from .metric import BaseMetricConfig, PerImageEvaluationMetric
34 '.50:.05:.95': np.linspace(.5, 0.95, np.round((0.95 - .5) / .05).astype(int) + 1, endpoint=True)
38 class MSCOCOAveragePresicionMetricConfig(BaseMetricConfig):
39 max_detections = NumberField(optional=True)
40 threshold = BaseField(optional=True)
43 class MSCOCOBaseMetric(PerImageEvaluationMetric):
44 annotation_types = (PoseEstimationAnnotation, DetectionAnnotation)
45 prediction_types = (PoseEstimationPrediction, DetectionPrediction)
47 def validate_config(self):
48 coco_config_validator = MSCOCOAveragePresicionMetricConfig(
49 'coco_metric', on_extra_argument=MSCOCOAveragePresicionMetricConfig.ERROR_ON_EXTRA_ARGUMENT
51 coco_config_validator.validate(self.config)
54 self.max_detections = self.config.get('max_detections', 20)
55 self.thresholds = get_or_parse_value(self.config.get('threshold', '.50:.05:.95'), COCO_THRESHOLDS)
56 label_map = self.dataset.metadata.get('label_map', [])
58 label for label in label_map
59 if label != self.dataset.metadata.get('background_label')
61 self.meta['names'] = [label_map[label] for label in self.labels]
62 self.matching_results = [[] for _ in self.labels]
64 def update(self, annotation, prediction):
65 compute_iou, create_boxes = select_specific_parameters(annotation)
67 for label_id, label in enumerate(self.labels):
68 detections, scores, dt_difficult = prepare_predictions(prediction, label, self.max_detections)
69 ground_truth, gt_difficult, iscrowd, boxes, areas = prepare_annotations(annotation, label, create_boxes)
70 iou = compute_iou(ground_truth, detections, boxes, areas)
71 self.matching_results[label_id].append(
83 def evaluate(self, annotations, predictions):
87 class MSCOCOAveragePresicion(MSCOCOBaseMetric):
88 __provider__ = 'coco_precision'
90 def evaluate(self, annotations, predictions):
92 compute_precision_recall(self.thresholds, self.matching_results[i])[0]
93 for i, _ in enumerate(self.labels)
99 class MSCOCORecall(MSCOCOBaseMetric):
100 __provider__ = 'coco_recall'
102 def evaluate(self, annotations, predictions):
104 compute_precision_recall(self.thresholds, self.matching_results[i])[1]
105 for i, _ in enumerate(self.labels)
110 def select_specific_parameters(annotation):
111 return compute_iou_boxes, False
113 @select_specific_parameters.register(PoseEstimationAnnotation)
114 def pose_estimation_params(annotation):
115 return compute_oks, True
118 def prepare(entry, order):
119 return np.c_[entry.x_mins[order], entry.y_mins[order], entry.x_maxs[order], entry.y_maxs[order]]
122 @prepare.register(Union[PoseEstimationPrediction, PoseEstimationAnnotation])
123 def prepare_keypoints(entry, order):
127 if np.size(entry.x_values[order]) == 0:
130 return np.concatenate((entry.x_values[order], entry.y_values[order], entry.visibility[order]), axis=-1)
133 def prepare_predictions(prediction, label, max_detections):
134 if prediction.size == 0:
136 prediction_ids = prediction.labels == label
137 scores = prediction.scores[prediction_ids]
138 if np.size(scores) == 0:
140 scores_ids = np.argsort(- scores, kind='mergesort')
141 difficult_box_mask = np.full(prediction.size, False)
142 difficult_box_mask[prediction.metadata.get('difficult_boxes', [])] = True
143 difficult_for_label = difficult_box_mask[prediction_ids]
144 if len(scores_ids) > max_detections:
145 scores_ids = scores_ids[:max_detections]
146 detections = prepare(prediction, prediction_ids)
147 detections = detections[scores_ids]
149 return detections, scores[scores_ids], difficult_for_label[scores_ids]
152 def prepare_annotations(annotation, label, create_boxes=False):
153 annotation_ids = annotation.labels == label
154 difficult_box_mask = np.full(annotation.size, False)
155 difficult_box_indices = annotation.metadata.get("difficult_boxes", [])
156 iscrowd = np.array(annotation.metadata.get('iscrowd', [0]*annotation.size))
157 difficult_box_mask[difficult_box_indices] = True
158 difficult_box_mask[iscrowd > 0] = True
159 difficult_label = difficult_box_mask[annotation_ids]
160 not_difficult_box_indices = np.argwhere(~difficult_label).reshape(-1)
161 difficult_box_indices = np.argwhere(difficult_label).reshape(-1)
162 iscrowd_label = iscrowd[annotation_ids]
163 order = np.hstack((not_difficult_box_indices, difficult_box_indices)).astype(int)
167 boxes = np.array(annotation.bboxes)
168 boxes = boxes[annotation_ids]
169 areas = np.array(annotation.areas)
170 areas = areas[annotation_ids] if np.size(areas) > 0 else np.array([])
174 return prepare(annotation, annotation_ids)[order], difficult_label[order], iscrowd_label[order], boxes, areas
177 def compute_precision_recall(thresholds, matching_results):
178 num_thresholds = len(thresholds)
179 rectangle_thresholds = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
180 num_rec_thresholds = len(rectangle_thresholds)
181 precision = -np.ones((num_thresholds, num_rec_thresholds)) # -1 for the precision of absent categories
182 recall = -np.ones(num_thresholds)
183 dt_scores = np.concatenate([e['scores'] for e in matching_results])
184 inds = np.argsort(-dt_scores, kind='mergesort')
185 dtm = np.concatenate([e['dt_matches'] for e in matching_results], axis=1)[:, inds]
186 dt_ignored = np.concatenate([e['dt_ignore'] for e in matching_results], axis=1)[:, inds]
187 gt_ignored = np.concatenate([e['gt_ignore'] for e in matching_results])
188 npig = np.count_nonzero(gt_ignored == 0)
189 tps = np.logical_and(dtm, np.logical_not(dt_ignored))
190 fps = np.logical_and(np.logical_not(dtm), np.logical_not(dt_ignored))
191 tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
192 fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
193 for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
196 num_detections = len(tp)
198 pr = tp / (fp + tp + np.spacing(1))
199 q = np.zeros(num_rec_thresholds)
206 # numpy is slow without cython optimization for accessing elements
207 # use python array gets significant speed improvement
211 for i in range(num_detections - 1, 0, -1):
212 if pr[i] > pr[i - 1]:
215 inds = np.searchsorted(rc, rectangle_thresholds, side='left')
217 for ri, pi in enumerate(inds):
221 precision[t] = np.array(q)
223 mean_precision = 0 if np.size(precision[precision > -1]) == 0 else np.mean(precision[precision > -1])
224 mean_recall = 0 if np.size(recall[recall > -1]) == 0 else np.mean(recall[recall > -1])
226 return mean_precision, mean_recall
229 def compute_iou_boxes(annotation, prediction, *args, **kwargs):
230 if np.size(annotation) == 0 or np.size(prediction) == 0:
232 overlap = Overlap.provide('iou')
233 iou = np.zeros((prediction.size // 4, annotation.size // 4), dtype=np.float32)
234 for i, box_a in enumerate(annotation):
235 for j, box_b in enumerate(prediction):
236 iou[j, i] = overlap(box_a, box_b)
241 def compute_oks(annotation_points, prediction_points, annotation_boxes, annotation_areas):
242 if np.size(prediction_points) == 0 or np.size(annotation_points) == 0:
244 oks = np.zeros((len(prediction_points), len(annotation_points)))
245 sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89])/10.0
246 variance = (sigmas * 2)**2
247 # compute oks between each detection and ground truth object
248 for gt_idx, gt_points in enumerate(annotation_points):
249 # create bounds for ignore regions(double the gt bbox)
251 ygt = gt_points[17:34]
253 k1 = np.count_nonzero(vgt > 0)
254 x0_bbox, y0_bbox, x1_bbox, y1_bbox = annotation_boxes[gt_idx]
255 area_gt = annotation_areas[gt_idx]
256 w_bbox = x1_bbox - x0_bbox
257 h_bbox = y1_bbox - y0_bbox
258 x0 = x0_bbox - w_bbox
259 x1 = x0_bbox + w_bbox * 2
260 y0 = y0_bbox - h_bbox
261 y1 = y0_bbox + h_bbox * 2
262 for dt_idx, dt_points in enumerate(prediction_points):
264 ydt = dt_points[17:34]
266 # measure the per-keypoint distance if keypoints visible
270 # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
271 zeros = np.zeros(len(sigmas))
272 x_diff = np.max((zeros, x0 - xdt), axis=0) + np.max((zeros, xdt - x1), axis=0)
273 y_diff = np.max((zeros, y0 - ydt), axis=0) + np.max((zeros, ydt - y1), axis=0)
274 evaluation = (x_diff ** 2 + y_diff ** 2) / variance / (area_gt + np.spacing(1)) / 2
276 evaluation = evaluation[vgt > 0]
277 oks[dt_idx, gt_idx] = np.sum(np.exp(- evaluation)) / evaluation.shape[0]
282 def evaluate_image(ground_truth, gt_difficult, iscrowd, detections, dt_difficult, scores, iou, thresholds):
283 thresholds_num = len(thresholds)
284 gt_num = len(ground_truth)
285 dt_num = len(detections)
286 gt_matched = np.zeros((thresholds_num, gt_num))
287 dt_matched = np.zeros((thresholds_num, dt_num))
288 gt_ignored = gt_difficult
289 dt_ignored = np.zeros((thresholds_num, dt_num))
291 for tind, t in enumerate(thresholds):
292 for dtind, _ in enumerate(detections):
293 # information about best match so far (matched_id = -1 -> unmatched)
294 iou_current = min([t, 1-1e-10])
296 for gtind, _ in enumerate(ground_truth):
297 # if this gt already matched, and not a crowd, continue
298 if gt_matched[tind, gtind] > 0 and not iscrowd[gtind]:
300 # if dt matched to reg gt, and on ignore gt, stop
301 if matched_id > -1 and not gt_ignored[matched_id] and gt_ignored[gtind]:
303 # continue to next gt unless better match made
304 if iou[dtind, gtind] < iou_current:
306 # if match successful and best so far, store appropriately
307 iou_current = iou[dtind, gtind]
309 # if match made store id of match for both dt and gt
312 dt_ignored[tind, dtind] = gt_ignored[matched_id]
313 dt_matched[tind, dtind] = 1
314 gt_matched[tind, matched_id] = dtind
315 # store results for given image
317 'dt_matches': dt_matched,
318 'gt_matches': gt_matched,
319 'gt_ignore': gt_ignored,
320 'dt_ignore': np.logical_or(dt_ignored, dt_difficult),