Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / src / cuda4dnn / primitives / concat.hpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_CONCAT_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_CONCAT_HPP
7
8 #include "../../op_cuda.hpp"
9
10 #include "../csl/stream.hpp"
11 #include "../csl/pointer.hpp"
12
13 #include "../kernels/fill.hpp"
14 #include "../kernels/concat.hpp"
15
16 #include <opencv2/core.hpp>
17
18 #include <cstddef>
19 #include <vector>
20 #include <utility>
21
22 namespace cv { namespace dnn { namespace cuda4dnn {
23
24     template <class T>
25     class ConcatOp final : public CUDABackendNode {
26     public:
27         using wrapper_type = GetCUDABackendWrapperType<T>;
28
29         ConcatOp(csl::Stream stream_, std::size_t concat_axis, bool zero_padding)
30             : stream(std::move(stream_)), concat_axis{ concat_axis }, zero_padding{ zero_padding }
31         {
32         }
33
34         void forward(
35             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
36             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
37             csl::Workspace& workspace) override
38         {
39             CV_Assert(outputs.size() == 1);
40
41             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
42             auto output = output_wrapper->getSpan();
43
44             if(zero_padding)
45             {
46                 auto output_shape = output_wrapper->getShape();
47
48                 kernels::fill<T>(stream, output, 0.0);
49
50                 std::size_t output_concat_axis_offset = 0;
51                 for (int i = 0; i < inputs.size(); i++)
52                 {
53                     auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
54                     auto input = input_wrapper->getView();
55                     auto input_shape = input_wrapper->getShape();
56
57                     std::vector<std::size_t> offsets(input_shape.size());
58                     for (int j = 0; j < offsets.size(); j++)
59                         offsets[j] = (output_shape[j] - input_shape[j]) / 2;
60                     offsets[concat_axis] = output_concat_axis_offset;
61
62                     kernels::concat_with_offsets(stream, output, input, offsets);
63
64                     output_concat_axis_offset += input.get_axis_size(concat_axis);
65                 }
66             }
67             else
68             {
69                 std::size_t output_axis_offset = 0;
70                 for (int i = 0; i < inputs.size(); i++)
71                 {
72                     auto input_wrapper = inputs[i].dynamicCast<wrapper_type>();
73                     auto input = input_wrapper->getView();
74
75                     kernels::concat(stream, output, output_axis_offset, input, concat_axis);
76
77                     output_axis_offset += input.get_axis_size(concat_axis);
78                 }
79             }
80         }
81
82     private:
83         csl::Stream stream;
84         std::size_t concat_axis;
85         bool zero_padding;
86     };
87
88 }}} /* namespace cv::dnn::cuda4dnn */
89
90 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_CONCAT_HPP */