1 // Copyright (C) 2019 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
5 // There are some code snippets in this file.
6 // Original source file is avaialble here (Copyright (c) 2018 Facebook, MIT License):
7 // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp
10 #include "ext_list.hpp"
11 #include "ext_base.hpp"
17 #include "ie_parallel.hpp"
19 namespace InferenceEngine {
20 namespace Extensions {
23 // implementation taken from Caffe2
37 void pre_calc_for_bilinear_interpolate(
40 const int pooled_height,
41 const int pooled_width,
50 std::vector<PreCalc<T>>& pre_calc) {
51 int pre_calc_index = 0;
52 for (int ph = 0; ph < pooled_height; ph++) {
53 for (int pw = 0; pw < pooled_width; pw++) {
54 for (int iy = 0; iy < iy_upper; iy++) {
55 const T yy = roi_start_h + ph * bin_size_h +
56 static_cast<T>(iy + .5f) * bin_size_h /
57 static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
58 for (int ix = 0; ix < ix_upper; ix++) {
59 const T xx = roi_start_w + pw * bin_size_w +
60 static_cast<T>(ix + .5f) * bin_size_w /
61 static_cast<T>(roi_bin_grid_w);
65 // deal with: inverse elements are out of feature map boundary
66 if (y < -1.0 || y > height || x < -1.0 || x > width) {
77 pre_calc.at(pre_calc_index) = pc;
89 int y_low = static_cast<int>(y);
90 int x_low = static_cast<int>(x);
94 if (y_low >= height - 1) {
95 y_high = y_low = height - 1;
101 if (x_low >= width - 1) {
102 x_high = x_low = width - 1;
110 T hy = static_cast<T>(1) - ly, hx = static_cast<T>(1) - lx;
111 T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
113 // save weights and indeces
115 pc.pos1 = y_low * width + x_low;
116 pc.pos2 = y_low * width + x_high;
117 pc.pos3 = y_high * width + x_low;
118 pc.pos4 = y_high * width + x_high;
123 pre_calc[pre_calc_index] = pc;
132 template <typename T>
133 void ROIAlignForward_cpu_kernel(
135 const T* bottom_data,
136 const T& spatial_scale,
140 const int pooled_height,
141 const int pooled_width,
142 const int sampling_ratio,
143 const T* bottom_rois,
147 int n_rois = nthreads / channels / pooled_width / pooled_height;
148 // (n, c, ph, pw) is an element in the pooled output
149 parallel_for(n_rois, [&](size_t n) {
150 int index_n = n * channels * pooled_width * pooled_height;
152 // roi could have 4 or 5 columns
153 const T* offset_bottom_rois = bottom_rois + n * roi_cols;
154 int roi_batch_ind = 0;
156 roi_batch_ind = static_cast<int>(offset_bottom_rois[0]);
157 offset_bottom_rois++;
160 // Do not using rounding; this implementation detail is critical
161 T roi_start_w = offset_bottom_rois[0] * spatial_scale;
162 T roi_start_h = offset_bottom_rois[1] * spatial_scale;
163 T roi_end_w = offset_bottom_rois[2] * spatial_scale;
164 T roi_end_h = offset_bottom_rois[3] * spatial_scale;
166 // Force malformed ROIs to be 1x1
167 T roi_width = std::max(roi_end_w - roi_start_w, (T)1.);
168 T roi_height = std::max(roi_end_h - roi_start_h, (T)1.);
169 T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
170 T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
172 // We use roi_bin_grid to sample the grid and mimic integral
173 int roi_bin_grid_h = (sampling_ratio > 0)
175 : static_cast<int>(ceil(roi_height / pooled_height)); // e.g., = 2
177 (sampling_ratio > 0) ? sampling_ratio : static_cast<int>(ceil(roi_width / pooled_width));
179 // We do average (integral) pooling inside a bin
180 const T count = static_cast<T>(roi_bin_grid_h * roi_bin_grid_w); // e.g. = 4
182 // we want to precalculate indeces and weights shared by all chanels,
183 // this is the key point of optimiation
184 std::vector<PreCalc<T>> pre_calc(
185 roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
186 pre_calc_for_bilinear_interpolate(
201 for (int c = 0; c < channels; c++) {
202 int index_n_c = index_n + c * pooled_width * pooled_height;
203 const T* offset_bottom_data =
204 bottom_data + (roi_batch_ind * channels + c) * height * width;
205 int pre_calc_index = 0;
207 for (int ph = 0; ph < pooled_height; ph++) {
208 for (int pw = 0; pw < pooled_width; pw++) {
209 int index = index_n_c + ph * pooled_width + pw;
212 for (int iy = 0; iy < roi_bin_grid_h; iy++) {
213 for (int ix = 0; ix < roi_bin_grid_w; ix++) {
214 PreCalc<T> pc = pre_calc[pre_calc_index];
215 output_val += pc.w1 * offset_bottom_data[pc.pos1] +
216 pc.w2 * offset_bottom_data[pc.pos2] +
217 pc.w3 * offset_bottom_data[pc.pos3] +
218 pc.w4 * offset_bottom_data[pc.pos4];
225 top_data[index] = output_val;
233 void redistribute_rois(const float* rois, int* level_ids,
234 const int num_rois, const int levels_num) {
235 const float canonical_scale = 224.0f;
236 const int canonical_level = 2;
238 for (int i = 0; i < num_rois; ++i) {
239 const float x0 = rois[4 * i + 0];
240 const float y0 = rois[4 * i + 1];
241 const float x1 = rois[4 * i + 2];
242 const float y1 = rois[4 * i + 3];
244 int target_level = levels_num;
245 float area = (x1 - x0) * (y1 - y0);
247 area = std::sqrt(area) / canonical_scale;
248 area = std::log2(area + 1e-6f);
249 target_level = static_cast<int>(std::floor(area + canonical_level));
250 target_level = std::max<int>(0, std::min<int>(levels_num - 1, target_level));
253 level_ids[i] = target_level;
258 void reorder(const float* src_data, const int* ranks, const int n, const int step, float* dst_data,
260 std::iota(dst_mapping, dst_mapping + n, 0);
261 std::sort(dst_mapping, dst_mapping + n, [&ranks](size_t i1, size_t i2) {return ranks[i1] < ranks[i2];});
262 for (int i = 0; i < n; ++i) {
263 const int j = dst_mapping[i];
264 assert(0 <= j && j < n);
265 std::memcpy(dst_data + i * step, src_data + j * step, sizeof(float) * step);
269 void split_points(const std::vector<int>& ids, std::vector<int>& rois_per_level, const int levels_num) {
270 rois_per_level.clear();
271 rois_per_level.resize(levels_num, 0);
272 for (size_t i = 0; i < ids.size(); ++i) {
273 assert(0 <= ids[i] && ids[i] < levels_num);
274 rois_per_level[ids[i]]++;
276 for (int i = 1; i < levels_num; ++i) {
277 rois_per_level[i] += rois_per_level[i - 1];
279 rois_per_level.insert(rois_per_level.begin(), 0);
283 void reorder_rois(const float *rois, const int* ids, int* mapping, const int rois_num,
284 float * reordered_rois, std::vector<int>& rois_per_level, const int levels_num) {
285 rois_per_level.clear();
286 rois_per_level.resize(levels_num, 0);
287 for (int i = 0; i < rois_num; ++i) {
288 assert(0 <= ids[i] && ids[i] < levels_num);
289 rois_per_level[ids[i]]++;
291 for (int i = 1; i < levels_num; ++i) {
292 rois_per_level[i] += rois_per_level[i - 1];
294 rois_per_level.insert(rois_per_level.begin(), 0);
296 std::vector<int> level_counter = rois_per_level;
298 for (int i = 0; i < rois_num; ++i) {
299 const int level = ids[i];
300 assert(level < levels_num);
301 const int j = level_counter[level];
302 assert(0 <= j && j < rois_num);
303 reordered_rois[j * 4 + 0] = rois[i * 4 + 0];
304 reordered_rois[j * 4 + 1] = rois[i * 4 + 1];
305 reordered_rois[j * 4 + 2] = rois[i * 4 + 2];
306 reordered_rois[j * 4 + 3] = rois[i * 4 + 3];
307 level_counter[level]++;
311 class ExperimentalDetectronROIFeatureExtractorImpl: public ExtLayerBase {
313 const int INPUT_ROIS {0};
314 const int INPUT_FEATURES_START {1};
316 const int OUTPUT_ROI_FEATURES {0};
317 const int OUTPUT_ROIS {1};
320 explicit ExperimentalDetectronROIFeatureExtractorImpl(const CNNLayer* layer) {
322 output_dim_ = layer->GetParamAsInt("output_size");
323 pyramid_scales_ = layer->GetParamAsInts("pyramid_scales");
324 sampling_ratio_ = layer->GetParamAsInt("sampling_ratio");
325 pooled_height_ = output_dim_;
326 pooled_width_ = output_dim_;
328 std::vector<DataConfigurator> inputs_layouts(layer->insData.size(), DataConfigurator(ConfLayout::PLN));
329 std::vector<DataConfigurator> outputs_layouts(layer->outData.size(), DataConfigurator(ConfLayout::PLN));
330 addConfig(layer, inputs_layouts, outputs_layouts);
331 } catch (InferenceEngine::details::InferenceEngineException &ex) {
332 errorMsg = ex.what();
336 StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
337 ResponseDesc *resp) noexcept override {
338 const int levels_num = inputs.size() - INPUT_FEATURES_START;
339 const int num_rois = inputs[INPUT_ROIS]->getTensorDesc().getDims()[0];
340 const int channels_num = inputs[INPUT_FEATURES_START]->getTensorDesc().getDims()[1];
341 const int feaxels_per_roi = pooled_height_ * pooled_width_ * channels_num;
343 auto *input_rois = inputs[INPUT_ROIS]->buffer().as<const float *>();
344 auto *output_rois_features = outputs[OUTPUT_ROI_FEATURES]->buffer().as<float *>();
345 float *output_rois = nullptr;
346 if (OUTPUT_ROIS < static_cast<int>(outputs.size())) {
347 output_rois = outputs[OUTPUT_ROIS]->buffer().as<float *>();
350 std::vector<int> level_ids(num_rois, 0);
351 redistribute_rois(input_rois, reinterpret_cast<int *>(&level_ids[0]), num_rois, levels_num);
353 std::vector<float> reordered_rois(4 * num_rois, 0);
354 std::vector<int> original_rois_mapping(num_rois, 0);
355 reorder(input_rois, &level_ids[0], num_rois, 4, &reordered_rois[0], &original_rois_mapping[0]);
357 std::vector<int> rois_per_level;
358 split_points(level_ids, rois_per_level, levels_num + 1);
360 std::vector<float> output_rois_features_temp(feaxels_per_roi * num_rois, 0);
361 for (int i = 0; i < levels_num; ++i) {
362 const int level_rois_offset = rois_per_level[i];
363 const int level_rois_num = rois_per_level[i + 1] - level_rois_offset;
364 if (level_rois_num > 0) {
365 auto *featuremap = inputs[INPUT_FEATURES_START + i]->buffer().as<const float *>();
366 const int featuremap_height = inputs[INPUT_FEATURES_START + i]->getTensorDesc().getDims()[2];
367 const int featuremap_width = inputs[INPUT_FEATURES_START + i]->getTensorDesc().getDims()[3];
368 ROIAlignForward_cpu_kernel<float>(feaxels_per_roi * level_rois_num,
370 1.0f / pyramid_scales_[i],
377 &reordered_rois[4 * level_rois_offset],
378 &output_rois_features_temp[feaxels_per_roi * level_rois_offset]);
382 std::vector<int> dummy_mapping(num_rois, 0);
383 reorder(&output_rois_features_temp[0], &original_rois_mapping[0], num_rois, feaxels_per_roi,
384 output_rois_features, &dummy_mapping[0]);
385 if (output_rois != nullptr) {
386 std::memcpy(output_rois, input_rois, 4 * num_rois * sizeof(float));
394 int pooled_height_ = 0;
395 int pooled_width_ = 0;
396 std::vector<int> pyramid_scales_;
397 int sampling_ratio_ = 0;
409 REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronROIFeatureExtractorImpl>, ExperimentalDetectronROIFeatureExtractor);
412 } // namespace Extensions
413 } // namespace InferenceEngine