2 * Copyright (c) 2021 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "DetectionPostProcessLayer.h"
19 #include "ndarray/Array.h"
37 using namespace ndarray;
39 using CenterSizeBox = DetectionPostProcessLayer::CenterSizeBox;
40 using CornerBox = DetectionPostProcessLayer::CornerBox;
42 using NonMaxSuppressionParam = DetectionPostProcessLayer::DetectionPostProcessParameters;
43 using Allocations = DetectionPostProcessLayer::Allocations;
47 OutputArrays(CornerBox *coords_buf, float *scores_buf, float *classes_buf,
48 int *num_selections_buf, size_t max_detections)
49 : coords(coords_buf, {max_detections}), scores(scores_buf, {max_detections}),
50 classes(classes_buf, {max_detections}), num_selections(num_selections_buf, {1})
54 Array<CornerBox> coords;
57 Array<int> num_selections;
60 struct TemporaryArrays
62 TemporaryArrays(int *selections_buffer, int max_detections)
63 : selections(selections_buffer, {static_cast<unsigned long>(max_detections)})
67 Array<int> selections;
70 // sort indices in decreasing order of first `k` scores
71 void PartialArgSort(const ContiguousSpan<float, true> &scores,
72 const ContiguousSpan<int, false> &indices, int k)
74 std::iota(indices.begin(), indices.begin() + k, 0);
75 std::partial_sort(indices.begin(), indices.begin() + k, indices.begin() + scores.size(),
76 [&scores](const int i, const int j) { return scores[i] > scores[j]; });
79 template <typename T> ContiguousSpan<T, false> static vecToSpan(std::vector<T> &v)
81 return ContiguousSpan<T, false>{v.begin(), v.end()};
84 Array<const CornerBox> decodeBoxes(const Array<float> &raw_boxes, const Array<float> &raw_anchors,
85 bool center_box_format, const CenterSizeBox &scales)
87 auto nbatches = raw_boxes.shape().dim(0);
88 auto num_boxes = raw_boxes.shape().dim(1);
90 auto anchors = array_cast<const CenterSizeBox>(raw_anchors, {num_boxes});
92 if (!center_box_format)
94 auto boxes_p = reinterpret_cast<const CornerBox *>(raw_boxes.flat().data());
95 return {boxes_p, {num_boxes}};
99 // TODO support box center-width encoding correctly
101 auto boxes_p = reinterpret_cast<const CenterSizeBox *>(raw_boxes.flat().data());
102 Array<const CenterSizeBox> in_boxes{boxes_p, {num_boxes}};
104 auto decoded_boxes_p = new CornerBox[nbatches * num_boxes];
105 Array<CornerBox> decoded_boxes_a{decoded_boxes_p, {num_boxes}};
107 for (size_t i = 0; i < num_boxes; ++i)
109 auto anchor = anchors.at(i);
110 auto &box = decoded_boxes_a.at(i);
111 float yc = in_boxes.at(i).y / scales.y * anchor.h + anchor.y;
112 float xc = in_boxes.at(i).x / scales.x * anchor.w + anchor.x;
113 float halfh = 0.5f * std::exp(in_boxes.at(i).h / scales.h) * anchor.h;
114 float halfw = 0.5f * std::exp(in_boxes.at(i).w / scales.w) * anchor.w;
120 assert(box.x2 > box.x1);
121 assert(box.y2 > box.y1);
124 auto decoded_boxes_a_shape = decoded_boxes_a.shape();
126 return array_cast<const CornerBox>(std::move(decoded_boxes_a), decoded_boxes_a_shape);
130 float computeIOU(const CornerBox &box1, const CornerBox &box2)
132 float area_i = (box1.y2 - box1.y1) * (box1.x2 - box1.x1);
133 float area_j = (box2.y2 - box2.y1) * (box2.x2 - box2.x1);
134 if (area_i <= 0 || area_j <= 0)
138 float in_ymin = std::max<float>(box1.y1, box2.y1);
139 float in_xmin = std::max<float>(box1.x1, box2.x1);
140 float in_ymax = std::min<float>(box1.y2, box2.y2);
141 float in_xmax = std::min<float>(box1.x2, box2.x2);
142 float in_area = std::max<float>(in_ymax - in_ymin, 0.0) * std::max<float>(in_xmax - in_xmin, 0.0);
144 return in_area / (area_i + area_j - in_area);
147 int doSingleClass(const Array<const CornerBox> &boxes, const std::vector<float> &scores,
148 const NonMaxSuppressionParam ¶m, TemporaryArrays &temps,
149 size_t max_detections)
151 auto num_boxes = boxes.shape().dim(0);
153 std::vector<int> sorted_box_indices(num_boxes);
154 PartialArgSort(ContiguousSpan<float, true>(scores.data(), num_boxes),
155 vecToSpan(sorted_box_indices), num_boxes);
157 // TODO move to temp allocations
158 std::vector<int> process_box(num_boxes, 1);
160 size_t selected_count = 0;
161 for (size_t i = 0; i < num_boxes; ++i)
163 auto box_index = sorted_box_indices[i];
165 if (!process_box[box_index] || scores[box_index] < param.score_threshold)
170 temps.selections.at(selected_count) = box_index;
173 if (selected_count >= max_detections)
178 for (size_t j = i + 1; j < num_boxes; ++j)
180 if (!process_box[sorted_box_indices[j]])
185 float IOU = computeIOU(boxes.at(box_index), boxes.at(sorted_box_indices[j]));
186 if (IOU > param.iou_threshold)
188 process_box[sorted_box_indices[j]] = 0;
193 return selected_count;
196 void collectBoxes(TemporaryArrays &temporary, const Array<const CornerBox> &decoded_boxes,
197 std::vector<float> &scores, int num_selected, OutputArrays &output,
198 const Array<int> &sorted_classes, int detections_per_box)
200 auto &selections = temporary.selections;
202 size_t output_box_count = 0;
204 for (int i = 0; i < num_selected; ++i)
206 int selected_box = selections.at(output_box_count);
208 for (int c = 0; c < detections_per_box; ++c)
210 output.classes.at(output_box_count) = sorted_classes.at(selected_box, c);
211 output.scores.at(output_box_count) = scores[selected_box];
212 output.coords.at(output_box_count) = decoded_boxes.at(selected_box);
218 void DetectionPostProcess(const Array<float> &boxes_a, const Array<float> &scores_a,
219 Array<float> &num_selected_a, const NonMaxSuppressionParam ¶m,
220 const Allocations &allocations, OutputArrays &outputs)
222 TemporaryArrays temporary(allocations.selections_buffer, param.max_detections);
224 // Only batch of 1 is supported atm
225 auto num_boxes = boxes_a.shape().dim(1);
226 size_t num_classes = param.num_classes;
227 size_t num_classes_with_background = scores_a.shape().dim(2);
228 bool have_background = num_classes_with_background != num_classes;
230 size_t max_classes_per_box = std::min<size_t>(num_classes, param.max_classes_per_detection);
232 // TODO move this to allocations
233 std::vector<int> sorted_class_indices(num_boxes * num_classes);
235 Array<int> class_indices(sorted_class_indices.data(), {num_boxes, num_classes});
237 // TODO move to allocations
238 std::vector<float> max_scores(num_boxes);
240 for (size_t row = 0; row < num_boxes; row++)
242 auto box_scores = scores_a.slice(0, row).offset(have_background ? 1 : 0);
243 auto indices = class_indices.slice(row);
245 PartialArgSort(box_scores, indices, num_classes);
247 max_scores[row] = box_scores[indices[0]];
251 Array<float>(reinterpret_cast<float *>(param.anchors_input->buffer()), {num_boxes, 4});
252 auto decoded_boxes = decodeBoxes(boxes_a, anchors_a, param.center_box_format, param.scales);
255 doSingleClass(decoded_boxes, max_scores, param, temporary, param.max_detections);
257 collectBoxes(temporary, decoded_boxes, max_scores, num_selected, outputs, class_indices,
258 max_classes_per_box);
260 num_selected_a.at(0) = num_selected;
264 template <typename T> Array<T> toArray(uint8_t *ptr, std::vector<int32_t> &descr)
266 ndarray::Shape shape(descr.size());
267 for (size_t i = 0; i < descr.size(); ++i)
269 shape.dim(i) = descr[i];
272 return Array<T>{reinterpret_cast<T *>(ptr), shape};
275 void DetectionPostProcessLayer::configure(DetectionPostProcessParameters parameters)
277 _parameters = std::move(parameters);
278 _allocations.selections_buffer = new int[_parameters.max_detections * 2];
281 void DetectionPostProcessLayer::run()
283 auto nbatches = (unsigned int)_parameters.boxes_descr[0];
284 // no suport for batch other than 1( it's fine since tflite does not support
285 // batch for postprocess either )
286 assert(nbatches == 1);
288 auto boxes_a = toArray<float>(_parameters.boxes_input->buffer(), _parameters.boxes_descr);
289 auto scores_a = toArray<float>(_parameters.scores_input->buffer(), _parameters.scrores_descr);
291 auto num_selected_a = ndarray::Array<float>(
292 reinterpret_cast<float *>(_parameters.num_selections_output->buffer()), {nbatches});
294 OutputArrays outputArrays(reinterpret_cast<CornerBox *>(_parameters.box_coords_output->buffer()),
295 reinterpret_cast<float *>(_parameters.box_scores_output->buffer()),
296 reinterpret_cast<float *>(_parameters.box_classes_output->buffer()),
297 reinterpret_cast<int *>(_parameters.num_selections_output->buffer()),
298 _parameters.max_detections);
300 DetectionPostProcess(boxes_a, scores_a, num_selected_a, _parameters, _allocations, outputArrays);
303 DetectionPostProcessLayer::~DetectionPostProcessLayer() { delete[] _allocations.selections_buffer; }
307 } // namespace backend