modules/dnn/src/cuda4dnn/primitives/transpose_convolution.hpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_TRANSPOSE_CONVOLUTION_HPP
   6 #define OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_TRANSPOSE_CONVOLUTION_HPP
   7
   8 #include "../../op_cuda.hpp"
   9
  10 #include "../csl/cudnn.hpp"
  11 #include "../csl/stream.hpp"
  12 #include "../csl/tensor.hpp"
  13 #include "../csl/tensor_ops.hpp"
  14
  15 #include "../kernels/scale_shift.hpp"
  16
  17 #include <opencv2/core.hpp>
  18
  19 #include <cstddef>
  20 #include <cstdint>
  21 #include <vector>
  22 #include <utility>
  23 #include <algorithm>
  24
  25 namespace cv { namespace dnn { namespace cuda4dnn {
  26
  27     struct TransposeConvolutionConfiguration {
  28         /* other than `input_shape` and `output_shape`, all the configuration values must be provided
  29          * for the corresponding convolution operation (not transpose convolution)
  30          */
  31
  32         /* the size of the following vectors must be equal to the kernel size */
  33         std::vector<std::size_t> kernel_size;
  34         std::vector<std::size_t> dilations, strides;
  35
  36         enum class PaddingMode {
  37             MANUAL, /* uses explicit padding values provided in `pads_begin` and `pads_end` */
  38             VALID, /* no padding is added */
  39             SAME /* TensorFlow logic is used for same padding */
  40         };
  41
  42         /* explicit paddings are used if and only if padMode is set to manual */
  43         PaddingMode padMode;
  44         std::vector<std::size_t> pads_begin, pads_end;
  45
  46         /* full shape inclusive of channel and batch axis */
  47         std::vector<std::size_t> input_shape;
  48         std::vector<std::size_t> output_shape;
  49
  50         /* group count for grouped convolution */
  51         std::size_t groups;
  52     };
  53
  54     template <class T>
  55     class TransposeConvolutionOp final : public CUDABackendNode {
  56     public:
  57         using wrapper_type = GetCUDABackendWrapperType<T>;
  58
  59         TransposeConvolutionOp(csl::Stream stream_, csl::cudnn::Handle handle, const TransposeConvolutionConfiguration& config, const Mat& filters, const Mat& bias)
  60             : stream(std::move(stream_)), cudnnHandle(std::move(handle))
  61         {
  62             /* we make use of backward pass of convolution to perform forward pass of transpose convolution
  63              * hence, we must setup configuration for the convolution operation and perform backward pass
  64              */
  65             const auto& kernel_size = config.kernel_size;
  66             const auto& dilations = config.dilations;
  67             const auto& strides = config.strides;
  68
  69             const auto convolution_order = kernel_size.size();
  70             CV_Assert(convolution_order >= 1);
  71
  72             CV_Assert(convolution_order == dilations.size());
  73             CV_Assert(convolution_order == strides.size());
  74
  75             const auto& input_shape = config.input_shape;
  76             const auto& output_shape = config.output_shape;
  77             CV_Assert(input_shape.size() == output_shape.size());
  78             CV_Assert(input_shape.size() == convolution_order + 2);
  79
  80             const auto groups = config.groups;
  81
  82             if (convolution_order > 3)
  83                 CV_Error(Error::StsNotImplemented, "Only 1D/2D/3D transpose convolution is supported.");
  84
  85             const auto rank = input_shape.size();
  86             const auto input_feature_maps = input_shape[1];
  87             const auto output_feature_maps = output_shape[1];
  88             const auto output_feature_maps_per_group = output_feature_maps / groups;
  89             CV_Assert(output_feature_maps % groups == 0);
  90
  91             filtersTensor = csl::makeTensorHeader<T>(filters);
  92             csl::copyMatToTensor<T>(filters, filtersTensor, stream);
  93
  94             if (!bias.empty())
  95             {
  96                 CV_Assert(bias.total() == output_feature_maps);
  97                 biasTensor = csl::makeTensorHeader<T>(bias);
  98                 csl::copyMatToTensor<T>(bias, biasTensor, stream);
  99             }
 100
 101             /* left and right are misleading as the padding is applicable for any number of dimensions
 102              * but we use those identifiers to avoid confusion with `pads_begin` and `pads_end`
 103              *
 104              * `common_padding` contains the amount of padding that has to be added to both sides
 105              * `padding_left` and `padding_right` contains the amount of padding that needs to be added
 106              * to a particular side in addition to the common padding
 107              *
 108              * note that we compute the padding for the convolution operation
 109              */
 110             std::vector<std::size_t> common_padding(rank, 0);
 111             std::vector<std::size_t> padding_left(rank, 0), padding_right(rank, 0);
 112             if (config.padMode == TransposeConvolutionConfiguration::PaddingMode::MANUAL)
 113             {
 114                 const auto& pads_begin = config.pads_begin;
 115                 const auto& pads_end = config.pads_end;
 116
 117                 CV_Assert(convolution_order == pads_begin.size());
 118                 CV_Assert(convolution_order == pads_end.size());
 119
 120                 for (int i = 2; i < common_padding.size(); i++)
 121                 {
 122                     common_padding[i] = std::min(pads_begin[i - 2], pads_end[i - 2]);
 123                     padding_left[i] = pads_begin[i - 2] - common_padding[i];
 124                     padding_right[i] = pads_end[i - 2] - common_padding[i];
 125                 }
 126             }
 127             else if (config.padMode == TransposeConvolutionConfiguration::PaddingMode::VALID)
 128             {
 129                 /* nothing to do as the paddings are already preset to zero */
 130             }
 131             else if (config.padMode == TransposeConvolutionConfiguration::PaddingMode::SAME)
 132             {
 133                 /* TensorFlow Logic:
 134                  * total_padding[i] = (o[i] - 1) * s[i] + effective_k[i] - i[i]
 135                  *
 136                  * if total padding is odd, the extra is added towards the end
 137                  */
 138                 for (int i = 2; i < rank; i++)
 139                 {
 140                     const auto j = i - 2; /* filter index */
 141                     const auto effective_kernel_size = dilations[j] * (kernel_size[j] - 1) + 1;
 142                     const auto required_total_padding =
 143                         std::max<std::int64_t>(0, (input_shape[i] - 1) * strides[j] + effective_kernel_size - output_shape[i]);
 144
 145                     common_padding[i] = required_total_padding / 2;
 146                     padding_left[i] = 0;
 147                     padding_right[i] = required_total_padding % 2;
 148                 }
 149             }
 150
 151             /* in some scenarios, the extra padding at the end may not change the output at all */
 152             for (int i = 2; i < rank; i++) {
 153                 const auto j = i - 2; /* filter idx */
 154                 const auto total_padding = common_padding[i] * 2 + padding_left[i] + padding_right[i];
 155                 const auto effective_kernel_size = dilations[j] * (kernel_size[j] - 1) + 1;
 156                 std::int64_t rem = (input_shape[i] + total_padding - effective_kernel_size) % strides[j];
 157
 158                 /* the output shape doesn't change if we decrease the total padding by at most `rem`
 159                  * provided that we decrease from the right
 160                  */
 161                 if (rem && padding_right[i] > 0)
 162                     padding_right[i] = std::max<std::int64_t>(0, padding_right[i] - rem);
 163             }
 164
 165             auto is_not_zero = [](std::size_t i) { return i != 0; };
 166             if(std::any_of(std::begin(padding_left), std::end(padding_left), is_not_zero) ||
 167                std::any_of(std::begin(padding_right), std::end(padding_right), is_not_zero))
 168             {
 169                 CV_Error(Error::StsNotImplemented, "Padding configuration requires asymmetric padding and hence is not supported.");
 170             }
 171
 172             typename csl::TransposeConvolution<T>::params_type params;
 173             params.input_shape.assign(std::begin(input_shape), std::end(input_shape));
 174             params.output_shape.assign(std::begin(output_shape), std::end(output_shape));
 175
 176             auto& fshape = params.filter_shape;
 177             fshape.resize(rank);
 178             fshape[0] = input_feature_maps;
 179             fshape[1] = output_feature_maps_per_group;
 180             std::copy(std::begin(kernel_size), std::end(kernel_size), std::begin(fshape) + 2);
 181             CV_Assert(fshape.size() == kernel_size.size() + 2);
 182
 183             params.padding.assign(std::begin(common_padding) + 2, std::end(common_padding));
 184             params.stride = strides;
 185             params.dilation = dilations;
 186             params.groups = config.groups;
 187
 188             convoluter = csl::TransposeConvolution<T>(cudnnHandle, params);
 189
 190             csl::WorkspaceBuilder builder;
 191             builder.require(convoluter.get_workspace_size());
 192             scratch_mem_in_bytes = builder.required_workspace_size();
 193         }
 194
 195         void forward(
 196             const std::vector<cv::Ptr<BackendWrapper>>& inputs,
 197             const std::vector<cv::Ptr<BackendWrapper>>& outputs,
 198             csl::Workspace& workspace) override
 199         {
 200             CV_Assert(inputs.size() == 1 && outputs.size() == 1);
 201
 202             auto input_wrapper = inputs[0].dynamicCast<wrapper_type>();
 203             auto input = input_wrapper->getView();
 204
 205             auto output_wrapper = outputs[0].dynamicCast<wrapper_type>();
 206             auto output = output_wrapper->getSpan();
 207
 208             csl::WorkspaceAllocator allocator(workspace);
 209             convoluter.transpose_convolve(output, input, filtersTensor, allocator.get_instance());
 210             if (!biasTensor.empty())
 211             {
 212                 std::size_t inner_size = total(output_wrapper->getShape(), 2, -1);
 213                 kernels::biasN<T>(stream, output, output, inner_size, biasTensor);
 214             }
 215         }
 216
 217         std::size_t get_workspace_memory_in_bytes() const noexcept override { return scratch_mem_in_bytes; }
 218
 219     private:
 220         csl::Stream stream;
 221         csl::cudnn::Handle cudnnHandle;
 222         csl::Tensor<T> filtersTensor, biasTensor;
 223         csl::TransposeConvolution<T> convoluter;
 224
 225         std::size_t scratch_mem_in_bytes;
 226     };
 227
 228 }}} /* namespace cv::dnn::cuda4dnn */
 229
 230 #endif /* OPENCV_DNN_SRC_CUDA4DNN_PRIMITIVES_TRANSPOSE_CONVOLUTION_HPP */