Merge pull request #14827 from YashasSamaga:cuda4dnn-csl-low
[platform/upstream/opencv.git] / modules / dnn / src / cuda4dnn / primitives / padding.hpp
1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PADDING_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PADDING_HPP
7
8 #include "../../op_cuda.hpp"
9
10 #include "../csl/stream.hpp"
11 #include "../csl/tensor.hpp"
12
13 #include "../kernels/fill.hpp"
14 #include "../kernels/concat.hpp"
15 #include "../kernels/padding.hpp"
16
17 #include <opencv2/core.hpp>
18
19 #include <cstddef>
20 #include <vector>
21 #include <algorithm>
22 #include <utility>
23
24 namespace cv { namespace dnn { namespace cuda4dnn {
25
26     enum class PaddingType {
27         CONSTANT,
28         REFLECTION101
29     };
30
31     template <class T>
32     class PaddingOp final : public CUDABackendNode {
33     public:
34         using wrapper_type = GetCUDABackendWrapperType<T>;
35
36         /* `ranges` is indexed by axis and contains the range in the output where the input is copied to */
37         PaddingOp(csl::Stream stream_, PaddingType type_, T value_, std::vector<cv::Range> ranges)
38             : stream(std::move(stream_)),  type{ type_ }, value{ value_ }, dstRanges(std::move(ranges))
39         {
40         }
41
42         void forward(
43             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
44             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
45             csl::Workspace& workspace) override
46         {
47             CV_Assert(inputs.size() == 1 && outputs.size() == 1);
48
49             auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
50             auto input = input_wrapper->getView();
51
52             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
53             auto output = output_wrapper->getSpan();
54
55             auto effective_rank = get_effective_rank(input);
56             CV_Assert(get_effective_rank(input) == get_effective_rank(output));
57
58             /* suppose we require padding for the first spatial axis (H in NCHW or D in NCDHW)
59              *
60              * there could be a case where the batch axis, channel axis, and the first spatial axis are all one
61              * this would result in effective rank being less than the number of axes requiring padding
62              */
63             effective_rank = std::max(effective_rank, dstRanges.size());
64
65             for (int i = effective_rank - dstRanges.size(); i < effective_rank; i++)
66             {
67                 if (dstRanges[i] == Range::all())
68                     CV_Assert(input.get_axis_size(i) == output.get_axis_size(i));
69                 else
70                     CV_Assert(input.get_axis_size(i) == dstRanges[i].size());
71             }
72
73             if (type == PaddingType::CONSTANT)
74             {
75                 kernels::fill<T>(stream, output, value);
76
77                 std::vector<std::size_t> offsets(effective_rank, 0);
78                 for (int i = 0; i < dstRanges.size(); i++)
79                 {
80                     const auto delta = effective_rank - dstRanges.size();
81                     if (dstRanges[i] != Range::all())
82                         offsets[delta + i] = dstRanges[i].start;
83                 }
84
85                 kernels::concat_with_offsets<T>(stream, output, input, offsets);
86             }
87             else if (type == PaddingType::REFLECTION101)
88             {
89                 std::vector<std::pair<std::size_t, std::size_t>> ranges(effective_rank);
90                 for (int i = 0; i < effective_rank; i++)
91                 {
92                     const auto delta = effective_rank - dstRanges.size();
93                     if (i < delta || dstRanges[i - delta] == Range::all())
94                         ranges[i] = { 0, input.get_axis_size(i) };
95                     else
96                         ranges[i] = { dstRanges[i].start, dstRanges[i].end };
97                 }
98
99                 kernels::copy_with_reflection101<T>(stream, output, input, ranges);
100             }
101         }
102
103     private:
104         csl::Stream stream;
105         PaddingType type;
106         T value;
107
108         std::vector<cv::Range> dstRanges;
109     };
110
111 }}} /* namespace cv::dnn::cuda4dnn */
112
113 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PADDING_HPP */