Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / src / cuda4dnn / primitives / region.hpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP
7
8 #include "../../op_cuda.hpp"
9
10 #include "../csl/stream.hpp"
11 #include "../csl/cudnn.hpp"
12 #include "../csl/tensor_ops.hpp"
13
14 #include "../kernels/region.hpp"
15
16 #include "../../nms.inl.hpp"
17
18 #include <opencv2/core.hpp>
19
20 #include <cstddef>
21 #include <utility>
22 #include <vector>
23
24 namespace cv { namespace dnn { namespace cuda4dnn {
25
26     enum class SquashMethod {
27         SOFTMAX,
28         SIGMOID
29     };
30
31     template <class T>
32     struct RegionConfiguration {
33         /* The image is divided into (H, W) cells.
34          *
35          * Each cell is interested in exactly one object and predicts `boxes_per_cell` bounding boxes
36          * for that object.
37          *
38          * Each bounding box contains:
39          * - 4 box coordinates
40          * - objectness confidence score
41          * - `classes` number of class scores
42          *
43          * The object score is reduced to a probability using sigmoid and the class scores are reduced to
44          * probabilities by either applying sigmoid or softmax (which is a configuration option).
45          *
46          * object_prob = sigmoid(object_score)
47          * conditional_class_prob = sigmoid, softmax across all classes
48          *
49          * actual class probability = conditional_class_prob * object_prob
50          */
51
52         /* method for reducing class scores to probabilities */
53         SquashMethod squash_method;
54
55         std::size_t classes, boxes_per_cell;
56
57         std::size_t width_norm, height_norm;
58
59         /* prob cutoffs below which the prediction is nulled */
60         T object_prob_cutoff;
61         T class_prob_cutoff;
62
63         T nms_iou_threshold;
64     };
65
66     template <class T>
67     class RegionOp final : public CUDABackendNode {
68     public:
69         using wrapper_type = GetCUDABackendWrapperType<T>;
70
71         template <class V>
72         RegionOp(csl::Stream stream_, const cv::Mat& bias, const RegionConfiguration<V>& config)
73             : stream(std::move(stream_))
74         {
75             biasTensor = csl::makeTensorHeader<T>(bias);
76             csl::copyMatToTensor<T>(bias, biasTensor, stream);
77
78             classes = config.classes;
79             boxes_per_cell = config.boxes_per_cell;
80
81             width_norm = config.width_norm;
82             height_norm = config.height_norm;
83
84             squash_type = config.squash_method;
85
86             object_prob_cutoff = config.object_prob_cutoff;
87             class_prob_cutoff = config.class_prob_cutoff;
88
89             nms_iou_threshold = config.nms_iou_threshold;
90         }
91
92         void forward(
93             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
94             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
95             csl::Workspace& workspace) override
96         {
97             CV_Assert(outputs.size() == 1);
98
99             auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
100             auto input = input_wrapper->getView();
101
102             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
103             auto output = output_wrapper->getSpan();
104
105             csl::memcpy<T>(output.get(), input.get(), output.size(), stream);
106
107             auto rows = input.get_axis_size(1);
108             auto cols = input.get_axis_size(2);
109
110             auto cell_box_size = classes + 4 + 1;
111
112             /* we squash class scores into probabilities using softmax or sigmoid */
113             if (squash_type == SquashMethod::SOFTMAX)
114                 kernels::softmax_strided<T>(stream, output, input, classes, cell_box_size, 5);
115             else if (squash_type == SquashMethod::SIGMOID)
116                 kernels::sigmoid_strided<T>(stream, output, input, classes, cell_box_size, 5);
117
118             kernels::region_finalize<T>(stream, output, input, biasTensor, object_prob_cutoff, class_prob_cutoff,
119                 height_norm, width_norm, rows, cols, boxes_per_cell, cell_box_size, classes);
120
121             if (nms_iou_threshold > 0) {
122                 auto output_mat = output_wrapper->getMutableHostMat();
123                 CV_Assert(output_mat.type() == CV_32F);
124                 for (int i = 0; i < input.get_axis_size(0); i++) {
125                     auto sample_size = rows * cols * boxes_per_cell * cell_box_size;
126                     do_nms_sort(reinterpret_cast<float*>(output_mat.data) + i * sample_size, rows * cols * boxes_per_cell, class_prob_cutoff, nms_iou_threshold);
127                 }
128             }
129         }
130
131     private:
132         void do_nms_sort(float *detections, int total, float score_thresh, float nms_thresh)
133         {
134             std::vector<Rect2d> boxes(total);
135             std::vector<float> scores(total);
136
137             for (int i = 0; i < total; ++i)
138             {
139                 Rect2d &b = boxes[i];
140                 int box_index = i * (classes + 4 + 1);
141                 b.width = detections[box_index + 2];
142                 b.height = detections[box_index + 3];
143                 b.x = detections[box_index + 0] - b.width / 2;
144                 b.y = detections[box_index + 1] - b.height / 2;
145             }
146
147             std::vector<int> indices;
148             for (int k = 0; k < classes; ++k)
149             {
150                 for (int i = 0; i < total; ++i)
151                 {
152                     int box_index = i * (classes + 4 + 1);
153                     int class_index = box_index + 5;
154                     scores[i] = detections[class_index + k];
155                     detections[class_index + k] = 0;
156                 }
157                 NMSBoxes(boxes, scores, score_thresh, nms_thresh, indices);
158                 for (int i = 0, n = indices.size(); i < n; ++i)
159                 {
160                     int box_index = indices[i] * (classes + 4 + 1);
161                     int class_index = box_index + 5;
162                     detections[class_index + k] = scores[indices[i]];
163                 }
164             }
165         }
166
167     private:
168         csl::Stream stream;
169
170         csl::Tensor<T> biasTensor;
171         std::size_t classes, boxes_per_cell;
172         std::size_t width_norm, height_norm;
173         SquashMethod squash_type;
174
175         T object_prob_cutoff, class_prob_cutoff;
176         T nms_iou_threshold;
177     };
178
179 }}} /* namespace cv::dnn::cuda4dnn */
180
181 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REGION_HPP */