1 // Copyright (C) 2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 #include "ext_list.hpp"
6 #include "ext_base.hpp"
15 #include "ie_parallel.hpp"
20 const std::vector<int> dims_;
23 explicit Indexer(const std::vector<int>& dims) : dims_(dims) {
25 for (size_t i = 0; i < dims_.size(); ++i) {
30 const int operator()(const std::vector<int>& idx) const {
32 assert(idx.size() == dims_.size());
33 for (size_t i = 0; i < dims_.size(); ++i) {
34 assert(0 <= idx[i] && idx[i] < dims_[i]);
35 flat_idx = flat_idx * dims_[i] + idx[i];
37 assert(flat_idx < total_);
44 namespace InferenceEngine {
45 namespace Extensions {
49 void refine_boxes(const float* boxes, const float* deltas, const float* weights, const float* scores,
50 float* refined_boxes, float* refined_boxes_areas, float* refined_scores,
51 const int rois_num, const int classes_num,
52 const float img_H, const float img_W,
53 const float max_delta_log_wh,
54 float coordinates_offset) {
55 Indexer box_idx({rois_num, 4});
56 Indexer delta_idx({rois_num, classes_num, 4});
57 Indexer score_idx({rois_num, classes_num});
59 Indexer refined_box_idx({classes_num, rois_num, 4});
60 Indexer refined_score_idx({classes_num, rois_num});
62 for (int roi_idx = 0; roi_idx < rois_num; ++roi_idx) {
63 float x0 = boxes[box_idx({roi_idx, 0})];
64 float y0 = boxes[box_idx({roi_idx, 1})];
65 float x1 = boxes[box_idx({roi_idx, 2})];
66 float y1 = boxes[box_idx({roi_idx, 3})];
68 if (x1 - x0 <= 0 || y1 - y0 <= 0) {
72 // width & height of box
73 const float ww = x1 - x0 + coordinates_offset;
74 const float hh = y1 - y0 + coordinates_offset;
75 // center location of box
76 const float ctr_x = x0 + 0.5f * ww;
77 const float ctr_y = y0 + 0.5f * hh;
79 for (int class_idx = 1; class_idx < classes_num; ++class_idx) {
80 const float dx = deltas[delta_idx({roi_idx, class_idx, 0})] / weights[0];
81 const float dy = deltas[delta_idx({roi_idx, class_idx, 1})] / weights[1];
82 const float d_log_w = deltas[delta_idx({roi_idx, class_idx, 2})] / weights[2];
83 const float d_log_h = deltas[delta_idx({roi_idx, class_idx, 3})] / weights[3];
85 // new center location according to deltas (dx, dy)
86 const float pred_ctr_x = dx * ww + ctr_x;
87 const float pred_ctr_y = dy * hh + ctr_y;
88 // new width & height according to deltas d(log w), d(log h)
89 const float pred_w = std::exp(std::min(d_log_w, max_delta_log_wh)) * ww;
90 const float pred_h = std::exp(std::min(d_log_h, max_delta_log_wh)) * hh;
92 // update upper-left corner location
93 float x0_new = pred_ctr_x - 0.5f * pred_w;
94 float y0_new = pred_ctr_y - 0.5f * pred_h;
95 // update lower-right corner location
96 float x1_new = pred_ctr_x + 0.5f * pred_w - coordinates_offset;
97 float y1_new = pred_ctr_y + 0.5f * pred_h - coordinates_offset;
99 // adjust new corner locations to be within the image region,
100 x0_new = std::max<float>(0.0f, std::min<float>(x0_new, img_W - coordinates_offset));
101 y0_new = std::max<float>(0.0f, std::min<float>(y0_new, img_H - coordinates_offset));
102 x1_new = std::max<float>(0.0f, std::min<float>(x1_new, img_W - coordinates_offset));
103 y1_new = std::max<float>(0.0f, std::min<float>(y1_new, img_H - coordinates_offset));
105 // recompute new width & height
106 const float box_w = x1_new - x0_new + coordinates_offset;
107 const float box_h = y1_new - y0_new + coordinates_offset;
109 refined_boxes[refined_box_idx({class_idx, roi_idx, 0})] = x0_new;
110 refined_boxes[refined_box_idx({class_idx, roi_idx, 1})] = y0_new;
111 refined_boxes[refined_box_idx({class_idx, roi_idx, 2})] = x1_new;
112 refined_boxes[refined_box_idx({class_idx, roi_idx, 3})] = y1_new;
114 refined_boxes_areas[refined_score_idx({class_idx, roi_idx})] = box_w * box_h;
116 refined_scores[refined_score_idx({class_idx, roi_idx})] = scores[score_idx({roi_idx, class_idx})];
121 template <typename T>
122 static bool SortScorePairDescend(const std::pair<float, T>& pair1,
123 const std::pair<float, T>& pair2) {
124 return pair1.first > pair2.first;
128 struct ConfidenceComparator {
129 explicit ConfidenceComparator(const float* conf_data) : _conf_data(conf_data) {}
131 bool operator()(int idx1, int idx2) {
132 if (_conf_data[idx1] > _conf_data[idx2]) return true;
133 if (_conf_data[idx1] < _conf_data[idx2]) return false;
137 const float* _conf_data;
140 static inline float JaccardOverlap(const float *decoded_bbox,
141 const float *bbox_sizes,
144 const float coordinates_offset = 1) {
145 float xmin1 = decoded_bbox[idx1 * 4 + 0];
146 float ymin1 = decoded_bbox[idx1 * 4 + 1];
147 float xmax1 = decoded_bbox[idx1 * 4 + 2];
148 float ymax1 = decoded_bbox[idx1 * 4 + 3];
150 float xmin2 = decoded_bbox[idx2 * 4 + 0];
151 float ymin2 = decoded_bbox[idx2 * 4 + 1];
152 float ymax2 = decoded_bbox[idx2 * 4 + 3];
153 float xmax2 = decoded_bbox[idx2 * 4 + 2];
155 if (xmin2 > xmax1 || xmax2 < xmin1 || ymin2 > ymax1 || ymax2 < ymin1) {
159 float intersect_xmin = std::max(xmin1, xmin2);
160 float intersect_ymin = std::max(ymin1, ymin2);
161 float intersect_xmax = std::min(xmax1, xmax2);
162 float intersect_ymax = std::min(ymax1, ymax2);
164 float intersect_width = intersect_xmax - intersect_xmin + coordinates_offset;
165 float intersect_height = intersect_ymax - intersect_ymin + coordinates_offset;
167 if (intersect_width <= 0 || intersect_height <= 0) {
171 float intersect_size = intersect_width * intersect_height;
172 float bbox1_size = bbox_sizes[idx1];
173 float bbox2_size = bbox_sizes[idx2];
175 return intersect_size / (bbox1_size + bbox2_size - intersect_size);
179 static void nms_cf(const float* conf_data,
186 const int pre_nms_topn,
187 const int post_nms_topn,
188 const float confidence_threshold,
189 const float nms_threshold) {
191 for (int i = 0; i < boxes_num; ++i) {
192 if (conf_data[i] > confidence_threshold) {
198 int num_output_scores = (pre_nms_topn == -1 ? count : std::min<int>(pre_nms_topn, count));
200 std::partial_sort_copy(indices, indices + count,
201 buffer, buffer + num_output_scores,
202 ConfidenceComparator(conf_data));
205 for (int i = 0; i < num_output_scores; ++i) {
206 const int idx = buffer[i];
209 for (int k = 0; k < detections; ++k) {
210 const int kept_idx = indices[k];
211 float overlap = JaccardOverlap(bboxes, sizes, idx, kept_idx);
212 if (overlap > nms_threshold) {
218 indices[detections] = idx;
223 detections = (post_nms_topn == -1 ? detections : std::min<int>(post_nms_topn, detections));
227 class ExperimentalDetectronDetectionOutputImpl: public ExtLayerBase {
229 const int INPUT_ROIS {0};
230 const int INPUT_DELTAS {1};
231 const int INPUT_SCORES {2};
232 const int INPUT_IM_INFO {3};
234 const int OUTPUT_BOXES {0};
235 const int OUTPUT_CLASSES {1};
236 const int OUTPUT_SCORES {2};
239 explicit ExperimentalDetectronDetectionOutputImpl(const CNNLayer* layer) {
241 score_threshold_ = layer->GetParamAsFloat("score_threshold");
242 nms_threshold_ = layer->GetParamAsFloat("nms_threshold");
243 max_delta_log_wh_ = layer->GetParamAsFloat("max_delta_log_wh");
244 classes_num_ = layer->GetParamAsInt("num_classes");
245 max_detections_per_class_ = layer->GetParamAsInt("post_nms_count");
246 max_detections_per_image_ = layer->GetParamAsInt("max_detections_per_image");
247 class_agnostic_box_regression_ = layer->GetParamAsBool("class_agnostic_box_regression", false);
248 deltas_weights_ = layer->GetParamAsFloats("deltas_weights");
250 std::vector<DataConfigurator> inputs_layouts(layer->insData.size(), DataConfigurator(ConfLayout::PLN));
251 std::vector<DataConfigurator> outputs_layouts(layer->outData.size(), DataConfigurator(ConfLayout::PLN));
252 addConfig(layer, inputs_layouts, outputs_layouts);
253 } catch (InferenceEngine::details::InferenceEngineException &ex) {
254 errorMsg = ex.what();
258 StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
259 ResponseDesc *resp) noexcept override {
260 const int rois_num = inputs[INPUT_ROIS]->getTensorDesc().getDims()[0];
261 assert(classes_num_ == static_cast<int>(inputs[INPUT_SCORES]->getTensorDesc().getDims()[1]));
262 assert(4 * classes_num_ == static_cast<int>(inputs[INPUT_DELTAS]->getTensorDesc().getDims()[1]));
264 const auto* boxes = inputs[INPUT_ROIS]->buffer().as<const float *>();
265 const auto* deltas = inputs[INPUT_DELTAS]->buffer().as<const float *>();
266 const auto* scores = inputs[INPUT_SCORES]->buffer().as<const float *>();
267 const auto* im_info = inputs[INPUT_IM_INFO]->buffer().as<const float *>();
269 auto* output_boxes = outputs[OUTPUT_BOXES]->buffer().as<float *>();
270 auto* output_scores = outputs[OUTPUT_SCORES]->buffer().as<float *>();
271 auto* output_classes = outputs[OUTPUT_CLASSES]->buffer().as<float *>();
273 const float img_H = im_info[0];
274 const float img_W = im_info[1];
277 std::vector<float> refined_boxes(classes_num_ * rois_num * 4, 0);
278 std::vector<float> refined_scores(classes_num_ * rois_num, 0);
279 std::vector<float> refined_boxes_areas(classes_num_ * rois_num, 0);
280 Indexer refined_box_idx({classes_num_, rois_num, 4});
281 Indexer refined_score_idx({classes_num_, rois_num});
283 refine_boxes(boxes, deltas, &deltas_weights_[0], scores,
284 &refined_boxes[0], &refined_boxes_areas[0], &refined_scores[0],
285 rois_num, classes_num_,
290 // Apply NMS class-wise.
291 std::vector<int> buffer(rois_num, 0);
292 std::vector<int> indices(classes_num_ * rois_num, 0);
293 std::vector<int> detections_per_class(classes_num_, 0);
294 int total_detections_num = 0;
296 for (int class_idx = 1; class_idx < classes_num_; ++class_idx) {
297 nms_cf(&refined_scores[refined_score_idx({class_idx, 0})],
298 &refined_boxes[refined_box_idx({class_idx, 0, 0})],
299 &refined_boxes_areas[refined_score_idx({class_idx, 0})],
301 &indices[total_detections_num],
302 detections_per_class[class_idx],
305 max_detections_per_class_,
308 total_detections_num += detections_per_class[class_idx];
311 // Leave only max_detections_per_image_ detections.
312 // confidence, <class, index>
313 std::vector<std::pair<float, std::pair<int, int>>> conf_index_class_map;
315 int indices_offset = 0;
316 for (int c = 0; c < classes_num_; ++c) {
317 int n = detections_per_class[c];
318 for (int i = 0; i < n; ++i) {
319 int idx = indices[indices_offset + i];
320 float score = refined_scores[refined_score_idx({c, idx})];
321 conf_index_class_map.push_back(std::make_pair(score, std::make_pair(c, idx)));
326 assert(max_detections_per_image_ > 0);
327 if (total_detections_num > max_detections_per_image_) {
328 std::partial_sort(conf_index_class_map.begin(),
329 conf_index_class_map.begin() + max_detections_per_image_,
330 conf_index_class_map.end(),
331 SortScorePairDescend<std::pair<int, int>>);
332 conf_index_class_map.resize(max_detections_per_image_);
333 total_detections_num = max_detections_per_image_;
337 memset(output_boxes, 0, max_detections_per_image_ * 4 * sizeof(float));
338 memset(output_scores, 0, max_detections_per_image_ * sizeof(float));
339 memset(output_classes, 0, max_detections_per_image_ * sizeof(float));
342 for (const auto & detection : conf_index_class_map) {
343 float score = detection.first;
344 int cls = detection.second.first;
345 int idx = detection.second.second;
346 output_boxes[4 * i + 0] = refined_boxes[refined_box_idx({cls, idx, 0})];
347 output_boxes[4 * i + 1] = refined_boxes[refined_box_idx({cls, idx, 1})];
348 output_boxes[4 * i + 2] = refined_boxes[refined_box_idx({cls, idx, 2})];
349 output_boxes[4 * i + 3] = refined_boxes[refined_box_idx({cls, idx, 3})];
350 output_scores[i] = score;
351 output_classes[i] = static_cast<float>(cls);
359 float score_threshold_;
360 float nms_threshold_;
361 float max_delta_log_wh_;
363 int max_detections_per_class_;
364 int max_detections_per_image_;
365 bool class_agnostic_box_regression_;
366 std::vector<float> deltas_weights_;
371 REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronDetectionOutputImpl>, ExperimentalDetectronDetectionOutput);
374 } // namespace Extensions
375 } // namespace InferenceEngine