Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / src / cuda4dnn / primitives / prior_box.hpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PRIOR_BOX_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PRIOR_BOX_HPP
7
8 #include "../../op_cuda.hpp"
9
10 #include "../csl/stream.hpp"
11 #include "../csl/span.hpp"
12 #include "../csl/tensor.hpp"
13
14 #include "../kernels/prior_box.hpp"
15
16 #include <cstddef>
17 #include <vector>
18 #include <utility>
19
20 namespace cv { namespace dnn { namespace cuda4dnn {
21
22     struct PriorBoxConfiguration {
23         std::size_t feature_map_width, feature_map_height;
24         std::size_t image_width, image_height;
25
26         /* parameters for prior boxes for each feature point */
27         std::vector<float> box_widths, box_heights;
28         std::vector<float> offsets_x, offsets_y;
29         float stepX, stepY;
30
31         std::vector<float> variance;
32
33         /* number of priors per feature point */
34         std::size_t num_priors;
35
36         /* clamps the box coordinates to [0, 1] range */
37         bool clip;
38
39         /* normalizes the box coordinates using the image dimensions */
40         bool normalize;
41     };
42
43     template <class T>
44     class PriorBoxOp final : public CUDABackendNode {
45     public:
46         using wrapper_type = GetCUDABackendWrapperType<T>;
47
48         PriorBoxOp(csl::Stream stream_, const PriorBoxConfiguration& config)
49             : stream(std::move(stream_))
50         {
51             feature_map_width = config.feature_map_width;
52             feature_map_height = config.feature_map_height;
53
54             image_width = config.image_width;
55             image_height = config.image_height;
56
57             const auto& box_widths = config.box_widths;
58             const auto& box_heights = config.box_heights;
59             CV_Assert(box_widths.size() == box_heights.size());
60
61             box_size = box_widths.size();
62
63             const auto& offsets_x = config.offsets_x;
64             const auto& offsets_y = config.offsets_y;
65             CV_Assert(offsets_x.size() == offsets_y.size());
66
67             offset_size = offsets_x.size();
68
69             /* for better memory utilization and preassumably better cache performance, we merge
70              * the four vectors and put them in a single tensor
71              */
72             auto total = box_widths.size() * 2 + offsets_x.size() * 2;
73             std::vector<float> merged_params;
74             merged_params.insert(std::end(merged_params), std::begin(box_widths), std::end(box_widths));
75             merged_params.insert(std::end(merged_params), std::begin(box_heights), std::end(box_heights));
76             merged_params.insert(std::end(merged_params), std::begin(offsets_x), std::end(offsets_x));
77             merged_params.insert(std::end(merged_params), std::begin(offsets_y), std::end(offsets_y));
78             CV_Assert(merged_params.size() == total);
79
80             paramsTensor.resize(total);
81             csl::memcpy(paramsTensor.get(), merged_params.data(), total, stream); /* synchronous copy */
82
83             const auto& variance_ = config.variance;
84             variance.assign(std::begin(variance_), std::end(variance_));
85
86             num_priors = config.num_priors;
87             stepX = config.stepX;
88             stepY = config.stepY;
89             clip = config.clip;
90             normalize = config.normalize;
91         }
92
93         void forward(
94             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
95             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
96             csl::Workspace& workspace) override
97         {
98             CV_Assert(inputs.size() == 2); /* we don't need the inputs but we are given */
99             CV_Assert(outputs.size() == 1);
100
101             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
102             auto output = output_wrapper->getSpan();
103
104             /* we had stored all the parameters in a single tensor; now we create appropriate views
105              * for each of the parameter arrays from the single tensor
106              */
107             auto boxWidths  = csl::View<float>(paramsTensor.get(), box_size);
108             auto boxHeights = csl::View<float>(paramsTensor.get() + box_size, box_size);
109             auto offsetsX   = csl::View<float>(paramsTensor.get() + 2 * box_size, offset_size);
110             auto offsetsY   = csl::View<float>(paramsTensor.get() + 2 * box_size + offset_size, offset_size);
111
112             kernels::generate_prior_boxes<T>(stream, output,
113                 boxWidths, boxHeights, offsetsX, offsetsY, stepX, stepY,
114                 variance, num_priors, feature_map_width, feature_map_height, image_width, image_height, normalize, clip);
115         }
116
117     private:
118         csl::Stream stream;
119         csl::Tensor<float> paramsTensor; /* widths, heights, offsetsX, offsetsY */
120
121         std::size_t feature_map_width, feature_map_height;
122         std::size_t image_width, image_height;
123
124         std::size_t box_size, offset_size;
125         float stepX, stepY;
126
127         std::vector<float> variance;
128
129         std::size_t num_priors;
130         bool clip, normalize;
131     };
132
133
134 }}} /* namespace cv::dnn::cuda4dnn */
135
136 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PRIOR_BOX_HPP */