Publishing 2019 R1 content
[platform/upstream/dldt.git] / tools / accuracy_checker / accuracy_checker / metrics / coco_metrics.py
1 """
2 Copyright (c) 2019 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8       http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 """
16
17 from functools import singledispatch
18 from typing import Union
19 import numpy as np
20 from ..config import NumberField, BaseField
21 from ..representation import (
22     DetectionPrediction,
23     DetectionAnnotation,
24     PoseEstimationPrediction,
25     PoseEstimationAnnotation
26 )
27 from ..utils import get_or_parse_value
28 from .overlap import Overlap
29 from .metric import BaseMetricConfig, PerImageEvaluationMetric
30
31 COCO_THRESHOLDS = {
32     '.50': [0.5],
33     '.75': [0.75],
34     '.50:.05:.95': np.linspace(.5, 0.95, np.round((0.95 - .5) / .05).astype(int) + 1, endpoint=True)
35 }
36
37
38 class MSCOCOAveragePresicionMetricConfig(BaseMetricConfig):
39     max_detections = NumberField(optional=True)
40     threshold = BaseField(optional=True)
41
42
43 class MSCOCOBaseMetric(PerImageEvaluationMetric):
44     annotation_types = (PoseEstimationAnnotation, DetectionAnnotation)
45     prediction_types = (PoseEstimationPrediction, DetectionPrediction)
46
47     def validate_config(self):
48         coco_config_validator = MSCOCOAveragePresicionMetricConfig(
49             'coco_metric', on_extra_argument=MSCOCOAveragePresicionMetricConfig.ERROR_ON_EXTRA_ARGUMENT
50         )
51         coco_config_validator.validate(self.config)
52
53     def configure(self):
54         self.max_detections = self.config.get('max_detections', 20)
55         self.thresholds = get_or_parse_value(self.config.get('threshold', '.50:.05:.95'), COCO_THRESHOLDS)
56         label_map = self.dataset.metadata.get('label_map', [])
57         self.labels = [
58             label for label in label_map
59             if label != self.dataset.metadata.get('background_label')
60         ]
61         self.meta['names'] = [label_map[label] for label in self.labels]
62         self.matching_results = [[] for _ in self.labels]
63
64     def update(self, annotation, prediction):
65         compute_iou, create_boxes = select_specific_parameters(annotation)
66
67         for label_id, label in enumerate(self.labels):
68             detections, scores, dt_difficult = prepare_predictions(prediction, label, self.max_detections)
69             ground_truth, gt_difficult, iscrowd, boxes, areas = prepare_annotations(annotation, label, create_boxes)
70             iou = compute_iou(ground_truth, detections, boxes, areas)
71             self.matching_results[label_id].append(
72                 evaluate_image(
73                     ground_truth,
74                     gt_difficult,
75                     iscrowd,
76                     detections,
77                     dt_difficult,
78                     scores,
79                     iou,
80                     self.thresholds
81                     ))
82
83     def evaluate(self, annotations, predictions):
84         pass
85
86
87 class MSCOCOAveragePresicion(MSCOCOBaseMetric):
88     __provider__ = 'coco_precision'
89
90     def evaluate(self, annotations, predictions):
91         precision = [
92             compute_precision_recall(self.thresholds, self.matching_results[i])[0]
93             for i, _ in enumerate(self.labels)
94         ]
95
96         return precision
97
98
99 class MSCOCORecall(MSCOCOBaseMetric):
100     __provider__ = 'coco_recall'
101
102     def evaluate(self, annotations, predictions):
103         recalls = [
104             compute_precision_recall(self.thresholds, self.matching_results[i])[1]
105             for i, _ in enumerate(self.labels)
106         ]
107
108         return recalls
109 @singledispatch
110 def select_specific_parameters(annotation):
111     return compute_iou_boxes, False
112
113 @select_specific_parameters.register(PoseEstimationAnnotation)
114 def pose_estimation_params(annotation):
115     return compute_oks, True
116
117 @singledispatch
118 def prepare(entry, order):
119     return np.c_[entry.x_mins[order], entry.y_mins[order], entry.x_maxs[order], entry.y_maxs[order]]
120
121
122 @prepare.register(Union[PoseEstimationPrediction, PoseEstimationAnnotation])
123 def prepare_keypoints(entry, order):
124     if entry.size == 0:
125         return []
126
127     if np.size(entry.x_values[order]) == 0:
128         return []
129
130     return np.concatenate((entry.x_values[order], entry.y_values[order], entry.visibility[order]), axis=-1)
131
132
133 def prepare_predictions(prediction, label, max_detections):
134     if prediction.size == 0:
135         return [], [], []
136     prediction_ids = prediction.labels == label
137     scores = prediction.scores[prediction_ids]
138     if np.size(scores) == 0:
139         return [], [], []
140     scores_ids = np.argsort(- scores, kind='mergesort')
141     difficult_box_mask = np.full(prediction.size, False)
142     difficult_box_mask[prediction.metadata.get('difficult_boxes', [])] = True
143     difficult_for_label = difficult_box_mask[prediction_ids]
144     if len(scores_ids) > max_detections:
145         scores_ids = scores_ids[:max_detections]
146     detections = prepare(prediction, prediction_ids)
147     detections = detections[scores_ids]
148
149     return detections, scores[scores_ids], difficult_for_label[scores_ids]
150
151
152 def prepare_annotations(annotation, label, create_boxes=False):
153     annotation_ids = annotation.labels == label
154     difficult_box_mask = np.full(annotation.size, False)
155     difficult_box_indices = annotation.metadata.get("difficult_boxes", [])
156     iscrowd = np.array(annotation.metadata.get('iscrowd', [0]*annotation.size))
157     difficult_box_mask[difficult_box_indices] = True
158     difficult_box_mask[iscrowd > 0] = True
159     difficult_label = difficult_box_mask[annotation_ids]
160     not_difficult_box_indices = np.argwhere(~difficult_label).reshape(-1)
161     difficult_box_indices = np.argwhere(difficult_label).reshape(-1)
162     iscrowd_label = iscrowd[annotation_ids]
163     order = np.hstack((not_difficult_box_indices, difficult_box_indices)).astype(int)
164     boxes = None
165     areas = None
166     if create_boxes:
167         boxes = np.array(annotation.bboxes)
168         boxes = boxes[annotation_ids]
169         areas = np.array(annotation.areas)
170         areas = areas[annotation_ids] if np.size(areas) > 0 else np.array([])
171         boxes = boxes[order]
172         areas = areas[order]
173
174     return prepare(annotation, annotation_ids)[order], difficult_label[order], iscrowd_label[order], boxes, areas
175
176
177 def compute_precision_recall(thresholds, matching_results):
178     num_thresholds = len(thresholds)
179     rectangle_thresholds = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
180     num_rec_thresholds = len(rectangle_thresholds)
181     precision = -np.ones((num_thresholds, num_rec_thresholds))  # -1 for the precision of absent categories
182     recall = -np.ones(num_thresholds)
183     dt_scores = np.concatenate([e['scores'] for e in matching_results])
184     inds = np.argsort(-dt_scores, kind='mergesort')
185     dtm = np.concatenate([e['dt_matches'] for e in matching_results], axis=1)[:, inds]
186     dt_ignored = np.concatenate([e['dt_ignore'] for e in matching_results], axis=1)[:, inds]
187     gt_ignored = np.concatenate([e['gt_ignore'] for e in matching_results])
188     npig = np.count_nonzero(gt_ignored == 0)
189     tps = np.logical_and(dtm, np.logical_not(dt_ignored))
190     fps = np.logical_and(np.logical_not(dtm), np.logical_not(dt_ignored))
191     tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
192     fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
193     for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
194         tp = np.array(tp)
195         fp = np.array(fp)
196         num_detections = len(tp)
197         rc = tp / npig
198         pr = tp / (fp + tp + np.spacing(1))
199         q = np.zeros(num_rec_thresholds)
200
201         if num_detections:
202             recall[t] = rc[-1]
203         else:
204             recall[t] = 0
205
206         # numpy is slow without cython optimization for accessing elements
207         #  use python array gets significant speed improvement
208         pr = pr.tolist()
209         q = q.tolist()
210
211         for i in range(num_detections - 1, 0, -1):
212             if pr[i] > pr[i - 1]:
213                 pr[i - 1] = pr[i]
214
215         inds = np.searchsorted(rc, rectangle_thresholds, side='left')
216         try:
217             for ri, pi in enumerate(inds):
218                 q[ri] = pr[pi]
219         except IndexError:
220             pass
221         precision[t] = np.array(q)
222
223     mean_precision = 0 if np.size(precision[precision > -1]) == 0 else np.mean(precision[precision > -1])
224     mean_recall = 0 if np.size(recall[recall > -1]) == 0 else np.mean(recall[recall > -1])
225
226     return mean_precision, mean_recall
227
228
229 def compute_iou_boxes(annotation, prediction, *args, **kwargs):
230     if np.size(annotation) == 0 or np.size(prediction) == 0:
231         return []
232     overlap = Overlap.provide('iou')
233     iou = np.zeros((prediction.size // 4, annotation.size // 4), dtype=np.float32)
234     for i, box_a in enumerate(annotation):
235         for j, box_b in enumerate(prediction):
236             iou[j, i] = overlap(box_a, box_b)
237
238     return iou
239
240
241 def compute_oks(annotation_points, prediction_points, annotation_boxes, annotation_areas):
242     if np.size(prediction_points) == 0 or np.size(annotation_points) == 0:
243         return []
244     oks = np.zeros((len(prediction_points), len(annotation_points)))
245     sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89])/10.0
246     variance = (sigmas * 2)**2
247     # compute oks between each detection and ground truth object
248     for gt_idx, gt_points in enumerate(annotation_points):
249         # create bounds for ignore regions(double the gt bbox)
250         xgt = gt_points[:17]
251         ygt = gt_points[17:34]
252         vgt = gt_points[34:]
253         k1 = np.count_nonzero(vgt > 0)
254         x0_bbox, y0_bbox, x1_bbox, y1_bbox = annotation_boxes[gt_idx]
255         area_gt = annotation_areas[gt_idx]
256         w_bbox = x1_bbox - x0_bbox
257         h_bbox = y1_bbox - y0_bbox
258         x0 = x0_bbox - w_bbox
259         x1 = x0_bbox + w_bbox * 2
260         y0 = y0_bbox - h_bbox
261         y1 = y0_bbox + h_bbox * 2
262         for dt_idx, dt_points in enumerate(prediction_points):
263             xdt = dt_points[:17]
264             ydt = dt_points[17:34]
265             if k1 > 0:
266                 # measure the per-keypoint distance if keypoints visible
267                 x_diff = xdt - xgt
268                 y_diff = ydt - ygt
269             else:
270                 # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
271                 zeros = np.zeros(len(sigmas))
272                 x_diff = np.max((zeros, x0 - xdt), axis=0) + np.max((zeros, xdt - x1), axis=0)
273                 y_diff = np.max((zeros, y0 - ydt), axis=0) + np.max((zeros, ydt - y1), axis=0)
274             evaluation = (x_diff ** 2 + y_diff ** 2) / variance / (area_gt + np.spacing(1)) / 2
275             if k1 > 0:
276                 evaluation = evaluation[vgt > 0]
277             oks[dt_idx, gt_idx] = np.sum(np.exp(- evaluation)) / evaluation.shape[0]
278
279     return oks
280
281
282 def evaluate_image(ground_truth, gt_difficult, iscrowd, detections, dt_difficult, scores, iou, thresholds):
283     thresholds_num = len(thresholds)
284     gt_num = len(ground_truth)
285     dt_num = len(detections)
286     gt_matched = np.zeros((thresholds_num, gt_num))
287     dt_matched = np.zeros((thresholds_num, dt_num))
288     gt_ignored = gt_difficult
289     dt_ignored = np.zeros((thresholds_num, dt_num))
290     if np.size(iou):
291         for tind, t in enumerate(thresholds):
292             for dtind, _ in enumerate(detections):
293                 # information about best match so far (matched_id = -1 -> unmatched)
294                 iou_current = min([t, 1-1e-10])
295                 matched_id = -1
296                 for gtind, _ in enumerate(ground_truth):
297                     # if this gt already matched, and not a crowd, continue
298                     if gt_matched[tind, gtind] > 0 and not iscrowd[gtind]:
299                         continue
300                     # if dt matched to reg gt, and on ignore gt, stop
301                     if matched_id > -1 and not gt_ignored[matched_id] and gt_ignored[gtind]:
302                         break
303                     # continue to next gt unless better match made
304                     if iou[dtind, gtind] < iou_current:
305                         continue
306                     # if match successful and best so far, store appropriately
307                     iou_current = iou[dtind, gtind]
308                     matched_id = gtind
309                 # if match made store id of match for both dt and gt
310                 if matched_id == -1:
311                     continue
312                 dt_ignored[tind, dtind] = gt_ignored[matched_id]
313                 dt_matched[tind, dtind] = 1
314                 gt_matched[tind, matched_id] = dtind
315     # store results for given image
316     return {
317         'dt_matches': dt_matched,
318         'gt_matches': gt_matched,
319         'gt_ignore': gt_ignored,
320         'dt_ignore': np.logical_or(dt_ignored, dt_difficult),
321         'scores': scores
322     }