1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PADDING_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PADDING_HPP
8 #include "../../op_cuda.hpp"
10 #include "../csl/stream.hpp"
11 #include "../csl/tensor.hpp"
13 #include "../kernels/fill.hpp"
14 #include "../kernels/concat.hpp"
15 #include "../kernels/padding.hpp"
17 #include <opencv2/core.hpp>
24 namespace cv { namespace dnn { namespace cuda4dnn {
26 enum class PaddingType {
32 class PaddingOp final : public CUDABackendNode {
34 using wrapper_type = GetCUDABackendWrapperType<T>;
36 /* `ranges` is indexed by axis and contains the range in the output where the input is copied to */
37 PaddingOp(csl::Stream stream_, PaddingType type_, T value_, std::vector<cv::Range> ranges)
38 : stream(std::move(stream_)), type{ type_ }, value{ value_ }, dstRanges(std::move(ranges))
43 const std::vector<cv::Ptr<BackendWrapper>>& inputs,
44 const std::vector<cv::Ptr<BackendWrapper>>& outputs,
45 csl::Workspace& workspace) override
47 CV_Assert(inputs.size() == 1 && outputs.size() == 1);
49 auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
50 auto input = input_wrapper->getView();
52 auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
53 auto output = output_wrapper->getSpan();
55 auto effective_rank = get_effective_rank(input);
56 CV_Assert(get_effective_rank(input) == get_effective_rank(output));
58 /* suppose we require padding for the first spatial axis (H in NCHW or D in NCDHW)
60 * there could be a case where the batch axis, channel axis, and the first spatial axis are all one
61 * this would result in effective rank being less than the number of axes requiring padding
63 effective_rank = std::max(effective_rank, dstRanges.size());
65 for (int i = effective_rank - dstRanges.size(); i < effective_rank; i++)
67 if (dstRanges[i] == Range::all())
68 CV_Assert(input.get_axis_size(i) == output.get_axis_size(i));
70 CV_Assert(input.get_axis_size(i) == dstRanges[i].size());
73 if (type == PaddingType::CONSTANT)
75 kernels::fill<T>(stream, output, value);
77 std::vector<std::size_t> offsets(effective_rank, 0);
78 for (int i = 0; i < dstRanges.size(); i++)
80 const auto delta = effective_rank - dstRanges.size();
81 if (dstRanges[i] != Range::all())
82 offsets[delta + i] = dstRanges[i].start;
85 kernels::concat_with_offsets<T>(stream, output, input, offsets);
87 else if (type == PaddingType::REFLECTION101)
89 std::vector<std::pair<std::size_t, std::size_t>> ranges(effective_rank);
90 for (int i = 0; i < effective_rank; i++)
92 const auto delta = effective_rank - dstRanges.size();
93 if (i < delta || dstRanges[i - delta] == Range::all())
94 ranges[i] = { 0, input.get_axis_size(i) };
96 ranges[i] = { dstRanges[i].start, dstRanges[i].end };
99 kernels::copy_with_reflection101<T>(stream, output, input, ranges);
108 std::vector<cv::Range> dstRanges;
111 }}} /* namespace cv::dnn::cuda4dnn */
113 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PADDING_HPP */