modules/dnn/src/cuda4dnn/primitives/padding.hpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PADDING_HPP
   6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PADDING_HPP
   7
   8 #include "../../op_cuda.hpp"
   9
  10 #include "../csl/stream.hpp"
  11 #include "../csl/tensor.hpp"
  12
  13 #include "../kernels/fill.hpp"
  14 #include "../kernels/concat.hpp"
  15 #include "../kernels/padding.hpp"
  16
  17 #include <opencv2/core.hpp>
  18
  19 #include <cstddef>
  20 #include <vector>
  21 #include <algorithm>
  22 #include <utility>
  23
  24 namespace cv { namespace dnn { namespace cuda4dnn {
  25
  26     enum class PaddingType {
  27         CONSTANT,
  28         REFLECTION101
  29     };
  30
  31     template <class T>
  32     class PaddingOp final : public CUDABackendNode {
  33     public:
  34         using wrapper_type = GetCUDABackendWrapperType<T>;
  35
  36         /* `ranges` is indexed by axis and contains the range in the output where the input is copied to */
  37         PaddingOp(csl::Stream stream_, PaddingType type_, T value_, std::vector<cv::Range> ranges)
  38             : stream(std::move(stream_)),  type{ type_ }, value{ value_ }, dstRanges(std::move(ranges))
  39         {
  40         }
  41
  42         void forward(
  43             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
  44             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
  45             csl::Workspace& workspace) override
  46         {
  47             CV_Assert(inputs.size() == 1 && outputs.size() == 1);
  48
  49             auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
  50             auto input = input_wrapper->getView();
  51
  52             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
  53             auto output = output_wrapper->getSpan();
  54
  55             auto effective_rank = get_effective_rank(input);
  56             CV_Assert(get_effective_rank(input) == get_effective_rank(output));
  57
  58             /* suppose we require padding for the first spatial axis (H in NCHW or D in NCDHW)
  59              *
  60              * there could be a case where the batch axis, channel axis, and the first spatial axis are all one
  61              * this would result in effective rank being less than the number of axes requiring padding
  62              */
  63             effective_rank = std::max(effective_rank, dstRanges.size());
  64
  65             for (int i = effective_rank - dstRanges.size(); i < effective_rank; i++)
  66             {
  67                 if (dstRanges[i] == Range::all())
  68                     CV_Assert(input.get_axis_size(i) == output.get_axis_size(i));
  69                 else
  70                     CV_Assert(input.get_axis_size(i) == dstRanges[i].size());
  71             }
  72
  73             if (type == PaddingType::CONSTANT)
  74             {
  75                 kernels::fill<T>(stream, output, value);
  76
  77                 std::vector<std::size_t> offsets(effective_rank, 0);
  78                 for (int i = 0; i < dstRanges.size(); i++)
  79                 {
  80                     const auto delta = effective_rank - dstRanges.size();
  81                     if (dstRanges[i] != Range::all())
  82                         offsets[delta + i] = dstRanges[i].start;
  83                 }
  84
  85                 kernels::concat_with_offsets<T>(stream, output, input, offsets);
  86             }
  87             else if (type == PaddingType::REFLECTION101)
  88             {
  89                 std::vector<std::pair<std::size_t, std::size_t>> ranges(effective_rank);
  90                 for (int i = 0; i < effective_rank; i++)
  91                 {
  92                     const auto delta = effective_rank - dstRanges.size();
  93                     if (i < delta || dstRanges[i - delta] == Range::all())
  94                         ranges[i] = { 0, input.get_axis_size(i) };
  95                     else
  96                         ranges[i] = { dstRanges[i].start, dstRanges[i].end };
  97                 }
  98
  99                 kernels::copy_with_reflection101<T>(stream, output, input, ranges);
 100             }
 101         }
 102
 103     private:
 104         csl::Stream stream;
 105         PaddingType type;
 106         T value;
 107
 108         std::vector<cv::Range> dstRanges;
 109     };
 110
 111 }}} /* namespace cv::dnn::cuda4dnn */
 112
 113 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_PADDING_HPP */