Publishing 2019 R1 content
[platform/upstream/dldt.git] / inference-engine / src / extension / ext_detectionoutput_onnx.cpp
1 // Copyright (C) 2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 //
4
5 #include "ext_list.hpp"
6 #include "ext_base.hpp"
7
8 #include <cassert>
9 #include <cfloat>
10 #include <vector>
11 #include <cmath>
12 #include <string>
13 #include <utility>
14 #include <algorithm>
15 #include "ie_parallel.hpp"
16
17
18 namespace {
19 struct Indexer {
20   const std::vector<int> dims_;
21   int total_{1};
22
23   explicit Indexer(const std::vector<int>& dims) : dims_(dims) {
24       total_ = 1;
25       for (size_t i = 0; i < dims_.size(); ++i) {
26           total_ *= dims_[i];
27       }
28   }
29
30   const int operator()(const std::vector<int>& idx) const {
31       int flat_idx = 0;
32       assert(idx.size() == dims_.size());
33       for (size_t i = 0; i < dims_.size(); ++i) {
34           assert(0 <= idx[i] && idx[i] < dims_[i]);
35           flat_idx = flat_idx * dims_[i] + idx[i];
36       }
37       assert(flat_idx < total_);
38       return flat_idx;
39   }
40 };
41 }  // namespace
42
43
44 namespace InferenceEngine {
45 namespace Extensions {
46 namespace Cpu {
47
48 static
49 void refine_boxes(const float* boxes, const float* deltas, const float* weights, const float* scores,
50                   float* refined_boxes, float* refined_boxes_areas, float* refined_scores,
51                   const int rois_num, const int classes_num,
52                   const float img_H, const float img_W,
53                   const float max_delta_log_wh,
54                   float coordinates_offset) {
55     Indexer box_idx({rois_num, 4});
56     Indexer delta_idx({rois_num, classes_num, 4});
57     Indexer score_idx({rois_num, classes_num});
58
59     Indexer refined_box_idx({classes_num, rois_num, 4});
60     Indexer refined_score_idx({classes_num, rois_num});
61
62     for (int roi_idx = 0; roi_idx < rois_num; ++roi_idx) {
63         float x0 = boxes[box_idx({roi_idx, 0})];
64         float y0 = boxes[box_idx({roi_idx, 1})];
65         float x1 = boxes[box_idx({roi_idx, 2})];
66         float y1 = boxes[box_idx({roi_idx, 3})];
67
68         if (x1 - x0 <= 0 || y1 - y0 <= 0) {
69             continue;
70         }
71
72         // width & height of box
73         const float ww = x1 - x0 + coordinates_offset;
74         const float hh = y1 - y0 + coordinates_offset;
75         // center location of box
76         const float ctr_x = x0 + 0.5f * ww;
77         const float ctr_y = y0 + 0.5f * hh;
78
79         for (int class_idx = 1; class_idx < classes_num; ++class_idx) {
80             const float dx = deltas[delta_idx({roi_idx, class_idx, 0})] / weights[0];
81             const float dy = deltas[delta_idx({roi_idx, class_idx, 1})] / weights[1];
82             const float d_log_w = deltas[delta_idx({roi_idx, class_idx, 2})] / weights[2];
83             const float d_log_h = deltas[delta_idx({roi_idx, class_idx, 3})] / weights[3];
84
85             // new center location according to deltas (dx, dy)
86             const float pred_ctr_x = dx * ww + ctr_x;
87             const float pred_ctr_y = dy * hh + ctr_y;
88             // new width & height according to deltas d(log w), d(log h)
89             const float pred_w = std::exp(std::min(d_log_w, max_delta_log_wh)) * ww;
90             const float pred_h = std::exp(std::min(d_log_h, max_delta_log_wh)) * hh;
91
92             // update upper-left corner location
93             float x0_new = pred_ctr_x - 0.5f * pred_w;
94             float y0_new = pred_ctr_y - 0.5f * pred_h;
95             // update lower-right corner location
96             float x1_new = pred_ctr_x + 0.5f * pred_w - coordinates_offset;
97             float y1_new = pred_ctr_y + 0.5f * pred_h - coordinates_offset;
98
99             // adjust new corner locations to be within the image region,
100             x0_new = std::max<float>(0.0f, std::min<float>(x0_new, img_W - coordinates_offset));
101             y0_new = std::max<float>(0.0f, std::min<float>(y0_new, img_H - coordinates_offset));
102             x1_new = std::max<float>(0.0f, std::min<float>(x1_new, img_W - coordinates_offset));
103             y1_new = std::max<float>(0.0f, std::min<float>(y1_new, img_H - coordinates_offset));
104
105             // recompute new width & height
106             const float box_w = x1_new - x0_new + coordinates_offset;
107             const float box_h = y1_new - y0_new + coordinates_offset;
108
109             refined_boxes[refined_box_idx({class_idx, roi_idx, 0})] = x0_new;
110             refined_boxes[refined_box_idx({class_idx, roi_idx, 1})] = y0_new;
111             refined_boxes[refined_box_idx({class_idx, roi_idx, 2})] = x1_new;
112             refined_boxes[refined_box_idx({class_idx, roi_idx, 3})] = y1_new;
113
114             refined_boxes_areas[refined_score_idx({class_idx, roi_idx})] = box_w * box_h;
115
116             refined_scores[refined_score_idx({class_idx, roi_idx})] = scores[score_idx({roi_idx, class_idx})];
117         }
118     }
119 }
120
121 template <typename T>
122 static bool SortScorePairDescend(const std::pair<float, T>& pair1,
123                                  const std::pair<float, T>& pair2) {
124     return pair1.first > pair2.first;
125 }
126
127
128 struct ConfidenceComparator {
129     explicit ConfidenceComparator(const float* conf_data) : _conf_data(conf_data) {}
130
131     bool operator()(int idx1, int idx2) {
132         if (_conf_data[idx1] > _conf_data[idx2]) return true;
133         if (_conf_data[idx1] < _conf_data[idx2]) return false;
134         return idx1 < idx2;
135     }
136
137     const float* _conf_data;
138 };
139
140 static inline float JaccardOverlap(const float *decoded_bbox,
141                                    const float *bbox_sizes,
142                                    const int idx1,
143                                    const int idx2,
144                                    const float coordinates_offset = 1) {
145     float xmin1 = decoded_bbox[idx1 * 4 + 0];
146     float ymin1 = decoded_bbox[idx1 * 4 + 1];
147     float xmax1 = decoded_bbox[idx1 * 4 + 2];
148     float ymax1 = decoded_bbox[idx1 * 4 + 3];
149
150     float xmin2 = decoded_bbox[idx2 * 4 + 0];
151     float ymin2 = decoded_bbox[idx2 * 4 + 1];
152     float ymax2 = decoded_bbox[idx2 * 4 + 3];
153     float xmax2 = decoded_bbox[idx2 * 4 + 2];
154
155     if (xmin2 > xmax1 || xmax2 < xmin1 || ymin2 > ymax1 || ymax2 < ymin1) {
156         return 0.0f;
157     }
158
159     float intersect_xmin = std::max(xmin1, xmin2);
160     float intersect_ymin = std::max(ymin1, ymin2);
161     float intersect_xmax = std::min(xmax1, xmax2);
162     float intersect_ymax = std::min(ymax1, ymax2);
163
164     float intersect_width  = intersect_xmax - intersect_xmin + coordinates_offset;
165     float intersect_height = intersect_ymax - intersect_ymin + coordinates_offset;
166
167     if (intersect_width <= 0 || intersect_height <= 0) {
168         return 0.0f;
169     }
170
171     float intersect_size = intersect_width * intersect_height;
172     float bbox1_size = bbox_sizes[idx1];
173     float bbox2_size = bbox_sizes[idx2];
174
175     return intersect_size / (bbox1_size + bbox2_size - intersect_size);
176 }
177
178
179 static void nms_cf(const float* conf_data,
180                           const float* bboxes,
181                           const float* sizes,
182                           int* buffer,
183                           int* indices,
184                           int& detections,
185                           const int boxes_num,
186                           const int pre_nms_topn,
187                           const int post_nms_topn,
188                           const float confidence_threshold,
189                           const float nms_threshold) {
190     int count = 0;
191     for (int i = 0; i < boxes_num; ++i) {
192         if (conf_data[i] > confidence_threshold) {
193             indices[count] = i;
194             count++;
195         }
196     }
197
198     int num_output_scores = (pre_nms_topn == -1 ? count : std::min<int>(pre_nms_topn, count));
199
200     std::partial_sort_copy(indices, indices + count,
201                            buffer, buffer + num_output_scores,
202                            ConfidenceComparator(conf_data));
203
204     detections = 0;
205     for (int i = 0; i < num_output_scores; ++i) {
206         const int idx = buffer[i];
207
208         bool keep = true;
209         for (int k = 0; k < detections; ++k) {
210             const int kept_idx = indices[k];
211             float overlap = JaccardOverlap(bboxes, sizes, idx, kept_idx);
212             if (overlap > nms_threshold) {
213                 keep = false;
214                 break;
215             }
216         }
217         if (keep) {
218             indices[detections] = idx;
219             detections++;
220         }
221     }
222
223     detections = (post_nms_topn == -1 ? detections : std::min<int>(post_nms_topn, detections));
224 }
225
226
227 class ExperimentalDetectronDetectionOutputImpl: public ExtLayerBase {
228 private:
229     const int INPUT_ROIS {0};
230     const int INPUT_DELTAS {1};
231     const int INPUT_SCORES {2};
232     const int INPUT_IM_INFO {3};
233
234     const int OUTPUT_BOXES {0};
235     const int OUTPUT_CLASSES {1};
236     const int OUTPUT_SCORES {2};
237
238 public:
239     explicit ExperimentalDetectronDetectionOutputImpl(const CNNLayer* layer) {
240         try {
241             score_threshold_ = layer->GetParamAsFloat("score_threshold");
242             nms_threshold_ = layer->GetParamAsFloat("nms_threshold");
243             max_delta_log_wh_ = layer->GetParamAsFloat("max_delta_log_wh");
244             classes_num_ = layer->GetParamAsInt("num_classes");
245             max_detections_per_class_ = layer->GetParamAsInt("post_nms_count");
246             max_detections_per_image_ = layer->GetParamAsInt("max_detections_per_image");
247             class_agnostic_box_regression_ = layer->GetParamAsBool("class_agnostic_box_regression", false);
248             deltas_weights_ = layer->GetParamAsFloats("deltas_weights");
249
250             std::vector<DataConfigurator> inputs_layouts(layer->insData.size(), DataConfigurator(ConfLayout::PLN));
251             std::vector<DataConfigurator> outputs_layouts(layer->outData.size(), DataConfigurator(ConfLayout::PLN));
252             addConfig(layer, inputs_layouts, outputs_layouts);
253         } catch (InferenceEngine::details::InferenceEngineException &ex) {
254             errorMsg = ex.what();
255         }
256     }
257
258     StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
259                        ResponseDesc *resp) noexcept override {
260         const int rois_num = inputs[INPUT_ROIS]->getTensorDesc().getDims()[0];
261         assert(classes_num_ == static_cast<int>(inputs[INPUT_SCORES]->getTensorDesc().getDims()[1]));
262         assert(4 * classes_num_ == static_cast<int>(inputs[INPUT_DELTAS]->getTensorDesc().getDims()[1]));
263
264         const auto* boxes = inputs[INPUT_ROIS]->buffer().as<const float *>();
265         const auto* deltas = inputs[INPUT_DELTAS]->buffer().as<const float *>();
266         const auto* scores = inputs[INPUT_SCORES]->buffer().as<const float *>();
267         const auto* im_info = inputs[INPUT_IM_INFO]->buffer().as<const float *>();
268
269         auto* output_boxes = outputs[OUTPUT_BOXES]->buffer().as<float *>();
270         auto* output_scores = outputs[OUTPUT_SCORES]->buffer().as<float *>();
271         auto* output_classes = outputs[OUTPUT_CLASSES]->buffer().as<float *>();
272
273         const float img_H = im_info[0];
274         const float img_W = im_info[1];
275
276         // Apply deltas.
277         std::vector<float> refined_boxes(classes_num_ * rois_num * 4, 0);
278         std::vector<float> refined_scores(classes_num_ * rois_num, 0);
279         std::vector<float> refined_boxes_areas(classes_num_ * rois_num, 0);
280         Indexer refined_box_idx({classes_num_, rois_num, 4});
281         Indexer refined_score_idx({classes_num_, rois_num});
282
283         refine_boxes(boxes, deltas, &deltas_weights_[0], scores,
284                      &refined_boxes[0], &refined_boxes_areas[0], &refined_scores[0],
285                      rois_num, classes_num_,
286                      img_H, img_W,
287                      max_delta_log_wh_,
288                      1.0f);
289
290         // Apply NMS class-wise.
291         std::vector<int> buffer(rois_num, 0);
292         std::vector<int> indices(classes_num_ * rois_num, 0);
293         std::vector<int> detections_per_class(classes_num_, 0);
294         int total_detections_num = 0;
295
296         for (int class_idx = 1; class_idx < classes_num_; ++class_idx) {
297             nms_cf(&refined_scores[refined_score_idx({class_idx, 0})],
298                    &refined_boxes[refined_box_idx({class_idx, 0, 0})],
299                    &refined_boxes_areas[refined_score_idx({class_idx, 0})],
300                    &buffer[0],
301                    &indices[total_detections_num],
302                    detections_per_class[class_idx],
303                    rois_num,
304                    -1,
305                    max_detections_per_class_,
306                    score_threshold_,
307                    nms_threshold_);
308             total_detections_num += detections_per_class[class_idx];
309         }
310
311         // Leave only max_detections_per_image_ detections.
312         // confidence, <class, index>
313         std::vector<std::pair<float, std::pair<int, int>>> conf_index_class_map;
314
315         int indices_offset = 0;
316         for (int c = 0; c < classes_num_; ++c) {
317             int n = detections_per_class[c];
318             for (int i = 0; i < n; ++i) {
319                 int idx = indices[indices_offset + i];
320                 float score = refined_scores[refined_score_idx({c, idx})];
321                 conf_index_class_map.push_back(std::make_pair(score, std::make_pair(c, idx)));
322             }
323             indices_offset += n;
324         }
325
326         assert(max_detections_per_image_ > 0);
327         if (total_detections_num > max_detections_per_image_) {
328             std::partial_sort(conf_index_class_map.begin(),
329                               conf_index_class_map.begin() + max_detections_per_image_,
330                               conf_index_class_map.end(),
331                               SortScorePairDescend<std::pair<int, int>>);
332             conf_index_class_map.resize(max_detections_per_image_);
333             total_detections_num = max_detections_per_image_;
334         }
335
336         // Fill outputs.
337         memset(output_boxes, 0, max_detections_per_image_ * 4 * sizeof(float));
338         memset(output_scores, 0, max_detections_per_image_ * sizeof(float));
339         memset(output_classes, 0, max_detections_per_image_ * sizeof(float));
340
341         int i = 0;
342         for (const auto & detection : conf_index_class_map) {
343             float score = detection.first;
344             int cls = detection.second.first;
345             int idx = detection.second.second;
346             output_boxes[4 * i + 0] = refined_boxes[refined_box_idx({cls, idx, 0})];
347             output_boxes[4 * i + 1] = refined_boxes[refined_box_idx({cls, idx, 1})];
348             output_boxes[4 * i + 2] = refined_boxes[refined_box_idx({cls, idx, 2})];
349             output_boxes[4 * i + 3] = refined_boxes[refined_box_idx({cls, idx, 3})];
350             output_scores[i] = score;
351             output_classes[i] = static_cast<float>(cls);
352             ++i;
353         }
354
355         return OK;
356     }
357
358 private:
359     float score_threshold_;
360     float nms_threshold_;
361     float max_delta_log_wh_;
362     int classes_num_;
363     int max_detections_per_class_;
364     int max_detections_per_image_;
365     bool class_agnostic_box_regression_;
366     std::vector<float> deltas_weights_;
367 };
368
369
370
371 REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronDetectionOutputImpl>, ExperimentalDetectronDetectionOutput);
372
373 }  // namespace Cpu
374 }  // namespace Extensions
375 }  // namespace InferenceEngine