modules/dnn/src/cuda4dnn/csl/cudnn/pooling.hpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 #ifndef OPENCV_DNN_CUDA4DNN_CSL_CUDNN_POOLING_HPP
   6 #define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_POOLING_HPP
   7
   8 #include "cudnn.hpp"
   9
  10 #include "../pointer.hpp"
  11
  12 #include <opencv2/core.hpp>
  13
  14 #include <cudnn.h>
  15
  16 #include <cstddef>
  17 #include <array>
  18 #include <algorithm>
  19 #include <vector>
  20 #include <type_traits>
  21 #include <iterator>
  22
  23 namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cudnn {
  24
  25     class PoolingDescriptor {
  26     public:
  27         enum class PoolingType {
  28             MAX,
  29             MAX_DETERMINISTIC,
  30             AVERAGE_EXCLUDE_PADDING,
  31             AVERAGE_INCLUDE_PADDING
  32         };
  33
  34         PoolingDescriptor() noexcept : descriptor{ nullptr } { }
  35         PoolingDescriptor(const PoolingDescriptor&) = delete;
  36         PoolingDescriptor(PoolingDescriptor&& other) noexcept
  37             : descriptor{ other.descriptor } {
  38             other.descriptor = nullptr;
  39         }
  40
  41         /** constructs a pooling descriptor
  42          *
  43          * Pre-conditions:
  44          * - \p window_size, \p padding and \p stride must have the same size
  45          *
  46          * The length of the containers is interpreted as the order of the pooling operation.
  47          *
  48          * Exception Guarantee: Basic
  49          */
  50         template <class SequenceContainer, typename = decltype(std::begin(std::declval<SequenceContainer>()))>
  51         PoolingDescriptor(
  52             const SequenceContainer& window_size,
  53             const SequenceContainer& padding,
  54             const SequenceContainer& stride,
  55             PoolingType type)
  56         {
  57             constructor(window_size, padding, stride, type);
  58         }
  59
  60         ~PoolingDescriptor() noexcept {
  61             if (descriptor != nullptr) {
  62                 /* cudnnDestroyPoolingDescriptor will not fail for a valid descriptor */
  63                 CUDA4DNN_CHECK_CUDNN(cudnnDestroyPoolingDescriptor(descriptor));
  64             }
  65         }
  66
  67         PoolingDescriptor& operator=(const PoolingDescriptor&) = delete;
  68         PoolingDescriptor& operator=(PoolingDescriptor&& other) noexcept {
  69             descriptor = other.descriptor;
  70             other.descriptor = nullptr;
  71             return *this;
  72         };
  73
  74         cudnnPoolingDescriptor_t get() const noexcept { return descriptor; }
  75
  76     private:
  77         template <class SequenceContainer>
  78         void constructor(
  79             const SequenceContainer& window_size,
  80             const SequenceContainer& padding,
  81             const SequenceContainer& stride,
  82             PoolingType type)
  83         {
  84             CV_Assert(window_size.size() == padding.size());
  85             CV_Assert(window_size.size() == stride.size());
  86
  87             auto get_pooling_type = [] (PoolingType type) {
  88                 switch (type) {
  89                 case PoolingType::MAX:
  90                     return CUDNN_POOLING_MAX;
  91                 case PoolingType::MAX_DETERMINISTIC:
  92                     return CUDNN_POOLING_MAX_DETERMINISTIC;
  93                 case PoolingType::AVERAGE_EXCLUDE_PADDING:
  94                     return CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
  95                 case PoolingType::AVERAGE_INCLUDE_PADDING:
  96                     return CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
  97                 }
  98                 CV_Error(Error::StsBadArg, "unknown pooling type");
  99             };
 100
 101             CUDA4DNN_CHECK_CUDNN(cudnnCreatePoolingDescriptor(&descriptor));
 102             try {
 103                 const auto rank = window_size.size();
 104                 if (rank == 2) {
 105                     CUDA4DNN_CHECK_CUDNN(
 106                         cudnnSetPooling2dDescriptor(
 107                             descriptor,
 108                             get_pooling_type(type), CUDNN_PROPAGATE_NAN,
 109                             window_size[0], window_size[1],
 110                             padding[0], padding[1],
 111                             stride[0], stride[1]
 112                         )
 113                     );
 114                 } else {
 115                     std::vector<int> iwindow_size(std::begin(window_size), std::end(window_size));
 116                     std::vector<int> ipadding(std::begin(padding), std::end(padding));
 117                     std::vector<int> istride(std::begin(stride), std::end(stride));
 118                     CUDA4DNN_CHECK_CUDNN(
 119                         cudnnSetPoolingNdDescriptor(
 120                             descriptor,
 121                             get_pooling_type(type), CUDNN_PROPAGATE_NAN,
 122                             rank, iwindow_size.data(), ipadding.data(), istride.data()
 123                         )
 124                     );
 125                 }
 126             } catch (...) {
 127                 /* cudnnDestroyPoolingDescriptor will not fail for a valid descriptor */
 128                 CUDA4DNN_CHECK_CUDNN(cudnnDestroyPoolingDescriptor(descriptor));
 129                 throw;
 130             }
 131         }
 132
 133         cudnnPoolingDescriptor_t descriptor;
 134     };
 135
 136     /** gives the shape of the output tensor after pooling
 137      *
 138      * @note it's not required to enforce the this shape in the output tensor; slightly different shapes will work
 139      *
 140      * Exception Guarantee: Basic
 141      */
 142     template <class T> inline
 143     void getPoolingForwardOutputDim(
 144         const PoolingDescriptor& poolingDesc,
 145         const TensorDescriptor<T>& inputDesc,
 146         std::vector<int>& output_dim)
 147     {
 148         output_dim.clear();
 149         output_dim.resize(CUDNN_DIM_MAX); /* we use `output_dim` to hold temporaries */
 150
 151         std::vector<int> temp(CUDNN_DIM_MAX);
 152         cudnnDataType_t tempDataType;
 153         CUDA4DNN_CHECK_CUDNN(
 154             cudnnGetTensorNdDescriptor(
 155                 inputDesc.get(),
 156                 CUDNN_DIM_MAX + 1, /* according to docs, this is what we do to get the rank */
 157                 &tempDataType,
 158                 output_dim.data(),
 159                 temp.data(),
 160                 temp.data()
 161             )
 162         );
 163
 164         const auto rank = output_dim[0];
 165         output_dim.resize(rank);
 166         CUDA4DNN_CHECK_CUDNN(
 167             cudnnGetPoolingNdForwardOutputDim(poolingDesc.get(), inputDesc.get(), rank, output_dim.data())
 168         );
 169     }
 170
 171     /** @brief performs pooling operation
 172      *
 173      * dstValue = alpha * result + beta * priorDstValue
 174      *
 175      * @tparam          T           pooling element type (must be `half` or `float`)
 176      *
 177      * @param           handle      valid cuDNN Handle
 178      * @param           poolingDesc pooling description
 179      * @param           inputDesc   tensor descriptor describing the input
 180      * @param[in]       inputPtr    pointer to input tensor in device memory
 181      * @param           alpha       result scale factor
 182      * @param           beta        previous value scale factor
 183      * @param           outputDesc  tensor descriptor describing the output
 184      * @param[out]      outputPtr   pointer to output tensor in device memory
 185      *
 186      * Exception Guarantee: Basic
 187      */
 188     template <class T>
 189     void pool(
 190         const Handle& handle,
 191         const PoolingDescriptor& poolingDesc,
 192         const TensorDescriptor<T>& inputDesc,
 193         const DevicePtr<const T> inputPtr,
 194         T alpha, T beta,
 195         const TensorDescriptor<T>& outputDesc,
 196         DevicePtr<T> outputPtr)
 197     {
 198         CV_Assert(handle);
 199
 200         CUDA4DNN_CHECK_CUDNN(
 201             cudnnPoolingForward(
 202                 handle.get(),
 203                 poolingDesc.get(),
 204                 &alpha, inputDesc.get(), inputPtr.get(),
 205                 &beta, outputDesc.get(), outputPtr.get()
 206             )
 207         );
 208     }
 209
 210     template <> inline
 211     void pool(
 212         const Handle& handle,
 213         const PoolingDescriptor& poolingDesc,
 214         const TensorDescriptor<half>& inputDesc,
 215         const DevicePtr<const half> inputPtr,
 216         half alpha, half beta,
 217         const TensorDescriptor<half>& outputDesc,
 218         DevicePtr<half> outputPtr)
 219     {
 220         CV_Assert(handle);
 221
 222         /* we specalize for fp16 as the scaling factors must be provided as `float` */
 223         float alpha_ = alpha, beta_ = beta;
 224         CUDA4DNN_CHECK_CUDNN(
 225             cudnnPoolingForward(
 226                 handle.get(),
 227                 poolingDesc.get(),
 228                 &alpha_, inputDesc.get(), inputPtr.get(),
 229                 &beta_, outputDesc.get(), outputPtr.get()
 230             )
 231         );
 232     }
 233
 234 }}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */
 235
 236 #endif /* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_POOLING_HPP */