1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP
8 #include "../../op_cuda.hpp"
10 #include "../csl/stream.hpp"
11 #include "../csl/cudnn.hpp"
12 #include "../csl/tensor_ops.hpp"
14 #include "../kernels/region.hpp"
16 #include "../../nms.inl.hpp"
18 #include <opencv2/core.hpp>
24 namespace cv { namespace dnn { namespace cuda4dnn {
26 enum class SquashMethod {
32 struct RegionConfiguration {
33 /* The image is divided into (H, W) cells.
35 * Each cell is interested in exactly one object and predicts `boxes_per_cell` bounding boxes
38 * Each bounding box contains:
40 * - objectness confidence score
41 * - `classes` number of class scores
43 * The object score is reduced to a probability using sigmoid and the class scores are reduced to
44 * probabilities by either applying sigmoid or softmax (which is a configuration option).
46 * object_prob = sigmoid(object_score)
47 * conditional_class_prob = sigmoid, softmax across all classes
49 * actual class probability = conditional_class_prob * object_prob
52 /* method for reducing class scores to probabilities */
53 SquashMethod squash_method;
55 std::size_t classes, boxes_per_cell;
57 std::size_t width_norm, height_norm;
59 /* prob cutoffs below which the prediction is nulled */
67 class RegionOp final : public CUDABackendNode {
69 using wrapper_type = GetCUDABackendWrapperType<T>;
72 RegionOp(csl::Stream stream_, const cv::Mat& bias, const RegionConfiguration<V>& config)
73 : stream(std::move(stream_))
75 biasTensor = csl::makeTensorHeader<T>(bias);
76 csl::copyMatToTensor<T>(bias, biasTensor, stream);
78 classes = config.classes;
79 boxes_per_cell = config.boxes_per_cell;
81 width_norm = config.width_norm;
82 height_norm = config.height_norm;
84 squash_type = config.squash_method;
86 object_prob_cutoff = config.object_prob_cutoff;
87 class_prob_cutoff = config.class_prob_cutoff;
89 nms_iou_threshold = config.nms_iou_threshold;
93 const std::vector<cv::Ptr<BackendWrapper>>& inputs,
94 const std::vector<cv::Ptr<BackendWrapper>>& outputs,
95 csl::Workspace& workspace) override
97 CV_Assert(outputs.size() == 1);
99 auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
100 auto input = input_wrapper->getView();
102 auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
103 auto output = output_wrapper->getSpan();
105 csl::memcpy<T>(output.get(), input.get(), output.size(), stream);
107 auto rows = input.get_axis_size(1);
108 auto cols = input.get_axis_size(2);
110 auto cell_box_size = classes + 4 + 1;
112 /* we squash class scores into probabilities using softmax or sigmoid */
113 if (squash_type == SquashMethod::SOFTMAX)
114 kernels::softmax_strided<T>(stream, output, input, classes, cell_box_size, 5);
115 else if (squash_type == SquashMethod::SIGMOID)
116 kernels::sigmoid_strided<T>(stream, output, input, classes, cell_box_size, 5);
118 kernels::region_finalize<T>(stream, output, input, biasTensor, object_prob_cutoff, class_prob_cutoff,
119 height_norm, width_norm, rows, cols, boxes_per_cell, cell_box_size, classes);
121 if (nms_iou_threshold > 0) {
122 auto output_mat = output_wrapper->getMutableHostMat();
123 CV_Assert(output_mat.type() == CV_32F);
124 for (int i = 0; i < input.get_axis_size(0); i++) {
125 auto sample_size = rows * cols * boxes_per_cell * cell_box_size;
126 do_nms_sort(reinterpret_cast<float*>(output_mat.data) + i * sample_size, rows * cols * boxes_per_cell, class_prob_cutoff, nms_iou_threshold);
132 void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)
134 std::vector<Rect2d> boxes(total);
135 std::vector<float> scores(total);
137 for (int i = 0; i < total; ++i)
139 Rect2d &b = boxes[i];
140 int box_index = i * (classes + 4 + 1);
141 b.width = detections[box_index + 2];
142 b.height = detections[box_index + 3];
143 b.x = detections[box_index + 0] - b.width / 2;
144 b.y = detections[box_index + 1] - b.height / 2;
147 std::vector<int> indices;
148 for (int k = 0; k < classes; ++k)
150 for (int i = 0; i < total; ++i)
152 int box_index = i * (classes + 4 + 1);
153 int class_index = box_index + 5;
154 scores[i] = detections[class_index + k];
155 detections[class_index + k] = 0;
157 NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);
158 for (int i = 0, n = indices.size(); i < n; ++i)
160 int box_index = indices[i] * (classes + 4 + 1);
161 int class_index = box_index + 5;
162 detections[class_index + k] = scores[indices[i]];
170 csl::Tensor<T> biasTensor;
171 std::size_t classes, boxes_per_cell;
172 std::size_t width_norm, height_norm;
173 SquashMethod squash_type;
175 T object_prob_cutoff, class_prob_cutoff;
179 }}} /* namespace cv::dnn::cuda4dnn */
181 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP */