Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / src / cuda4dnn / primitives / reorg.hpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REORG_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REORG_HPP
7
8 #include "../../op_cuda.hpp"
9
10 #include "../csl/stream.hpp"
11 #include "../kernels/permute.hpp"
12
13 #include <opencv2/core.hpp>
14
15 #include <vector>
16 #include <utility>
17
18 namespace cv { namespace dnn { namespace cuda4dnn {
19
20     template <class T>
21     class ReorgOp final : public CUDABackendNode {
22     public:
23         using wrapper_type = GetCUDABackendWrapperType<T>;
24
25         ReorgOp(csl::Stream stream_, std::size_t stride_)
26             : stream(std::move(stream_)), stride{ stride_ } { }
27
28         void forward(
29             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
30             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
31             csl::Workspace& workspace) override
32         {
33             CV_Assert(inputs.size() == 1 && outputs.size() == 1);
34
35             auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
36             auto input = input_wrapper->getView();
37
38             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
39             auto output = output_wrapper->getSpan();
40
41             const std::size_t permute_input_shape[] = {
42                input.get_axis_size(0),
43                input.get_axis_size(1) * input.get_axis_size(2) / (stride * stride),
44                stride,
45                input.get_axis_size(3),
46                stride
47             };
48
49             constexpr std::size_t order[] = { 0, 2, 4, 1, 3 };
50
51             const std::size_t permute_output_shape[] = {
52                 permute_input_shape[order[0]],
53                 permute_input_shape[order[1]],
54                 permute_input_shape[order[2]],
55                 permute_input_shape[order[3]],
56                 permute_input_shape[order[4]]
57             };
58
59             input.unsqueeze();
60             input.reshape(std::begin(permute_input_shape), std::end(permute_input_shape));
61
62             output.unsqueeze();
63             output.reshape(std::begin(permute_output_shape), std::end(permute_output_shape));
64
65             kernels::permute(stream, output, input, { std::begin(order), std::end(order) });
66         }
67
68     private:
69         csl::Stream stream;
70         std::size_t stride;
71     };
72
73 }}} /* namespace cv::dnn::cuda4dnn */
74
75 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_REORG_HPP */