modules/dnn/src/cuda4dnn/csl/cudnn/lrn.hpp

   1 // This file is part of OpenCV project.
   2 // It is subject to the license terms in the LICENSE file found in the top-level directory
   3 // of this distribution and at http://opencv.org/license.html.
   4
   5 #ifndef OPENCV_DNN_CUDA4DNN_CSL_CUDNN_LRN_HPP
   6 #define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_LRN_HPP
   7
   8 #include "cudnn.hpp"
   9
  10 #include "../pointer.hpp"
  11 #include "../workspace.hpp"
  12
  13 #include <opencv2/core.hpp>
  14
  15 #include <cudnn.h>
  16
  17 #include <cstddef>
  18
  19 namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cudnn {
  20
  21     class LRNDescriptor {
  22     public:
  23         enum class LRNType {
  24             ACROSS_CHANNELS,
  25             WITHIN_CHANNEL
  26         };
  27
  28         LRNDescriptor() noexcept : descriptor{ nullptr } { }
  29         LRNDescriptor(const LRNDescriptor&) = delete;
  30         LRNDescriptor(LRNDescriptor&& other) noexcept
  31             : descriptor{ other.descriptor }, type{ other.type } {
  32             other.descriptor = nullptr;
  33         }
  34
  35         /** sets up a LRN descriptor
  36          *
  37          * @param local_size    size of the normalization window
  38          * @param alpha         variance scaling parameter
  39          * @param beta          power parameter
  40          * @param k             bias parameter
  41          *
  42          * @note \p alpha is divided by the window width in across channels mode
  43          * @note \p alpha is divided by the (window width)^spatialDimensions in within channel mode
  44          *
  45          * @note the \p alpha, \p beta and \p k will be type casted to the tensor datatype during operation
  46          *
  47          * Exception Guarantee: Basic
  48          */
  49         LRNDescriptor(std::size_t local_size, double alpha, double beta, double k, LRNType type_) {
  50             constructor(local_size, alpha, beta, k, type_);
  51         }
  52
  53         ~LRNDescriptor() noexcept {
  54             if (descriptor != nullptr) {
  55                 /* cudnnDestroyLRNDescriptor will not fail for a valid descriptor */
  56                 CUDA4DNN_CHECK_CUDNN(cudnnDestroyLRNDescriptor(descriptor));
  57             }
  58         }
  59
  60         LRNDescriptor& operator=(const LRNDescriptor&) = delete;
  61         LRNDescriptor& operator=(LRNDescriptor&& other) noexcept {
  62             descriptor = other.descriptor;
  63             type = other.type;
  64             other.descriptor = nullptr;
  65             return *this;
  66         };
  67
  68         cudnnLRNDescriptor_t get() const noexcept { return descriptor; }
  69         LRNType getType() const noexcept { return type; }
  70
  71     private:
  72         void constructor(std::size_t local_size, double alpha, double beta, double k, LRNType type_) {
  73             CV_Assert(CUDNN_LRN_MIN_N <= local_size && local_size <= CUDNN_LRN_MAX_N);
  74
  75             type = type_;
  76
  77             CUDA4DNN_CHECK_CUDNN(cudnnCreateLRNDescriptor(&descriptor));
  78             try {
  79                 CUDA4DNN_CHECK_CUDNN(
  80                     cudnnSetLRNDescriptor(
  81                         descriptor,
  82                         local_size,
  83                         alpha,
  84                         beta,
  85                         k
  86                     )
  87                );
  88             } catch (...) {
  89                 /* cudnnDestroyLRNDescriptor will not fail for a valid descriptor */
  90                 CUDA4DNN_CHECK_CUDNN(cudnnDestroyLRNDescriptor(descriptor));
  91                 throw;
  92             }
  93         }
  94
  95         cudnnLRNDescriptor_t descriptor;
  96         LRNType type;
  97     };
  98
  99     /** @brief performs local response normalization
 100      *
 101      * dstValue = alpha * result + beta * priorDstValue
 102      *
 103      * @tparam          T           element type (must be `half` or `float`)
 104      *
 105      * @param           handle      valid cuDNN Handle
 106      * @param           lrnDesc     LRN description
 107      * @param           inputDesc   tensor descriptor describing the input
 108      * @param[in]       inputPtr    pointer to input tensor in device memory
 109      * @param           alpha       result scale factor
 110      * @param           beta        previous value scale factor
 111      * @param           outputDesc  tensor descriptor describing the output
 112      * @param[out]      outputPtr   pointer to output tensor in device memory
 113      * @param           workspace   workspace memory which meets the requirements of \p convAlgo
 114      *
 115      * Exception Guarantee: Basic
 116      */
 117     template <class T>
 118     void LRNForward(
 119         const Handle& handle,
 120         const LRNDescriptor& lrnDesc,
 121         const TensorDescriptor<T>& inputDesc,
 122         DevicePtr<const T> inputPtr,
 123         T alpha, T beta,
 124         const TensorDescriptor<T>& outputDesc,
 125         DevicePtr<T> outputPtr,
 126         WorkspaceInstance workspace)
 127     {
 128         CV_Assert(handle);
 129
 130         if (lrnDesc.getType() == LRNDescriptor::LRNType::ACROSS_CHANNELS) {
 131             CUDA4DNN_CHECK_CUDNN(
 132                 cudnnLRNCrossChannelForward(
 133                     handle.get(),
 134                     lrnDesc.get(), CUDNN_LRN_CROSS_CHANNEL_DIM1,
 135                     &alpha, inputDesc.get(), inputPtr.get(),
 136                     &beta, outputDesc.get(), outputPtr.get()
 137                 )
 138             );
 139         } else if (lrnDesc.getType() == LRNDescriptor::LRNType::WITHIN_CHANNEL) {
 140             std::size_t size;
 141             CUDA4DNN_CHECK_CUDNN(cudnnGetTensorSizeInBytes(inputDesc.get(), &size));
 142
 143             DevicePtr<void> temp1 = workspace.get_span<half>(size).data();
 144             DevicePtr<void> temp2 = workspace.get_span<half>(size).data();
 145
 146             CUDA4DNN_CHECK_CUDNN(
 147                 cudnnDivisiveNormalizationForward(
 148                     handle.get(),
 149                     lrnDesc.get(), CUDNN_DIVNORM_PRECOMPUTED_MEANS,
 150                     &alpha, inputDesc.get(), inputPtr.get(),
 151                     NULL,
 152                     static_cast<void*>(temp1), static_cast<void*>(temp2),
 153                     &beta, outputDesc.get(), outputPtr.get()
 154                 )
 155             );
 156         }
 157     }
 158
 159     template <> inline
 160     void LRNForward(
 161        const Handle& handle,
 162        const LRNDescriptor& lrnDesc,
 163        const TensorDescriptor<half>& inputDesc,
 164        DevicePtr<const half> inputPtr,
 165        half alpha, half beta,
 166        const TensorDescriptor<half>& outputDesc,
 167        DevicePtr<half> outputPtr,
 168         WorkspaceInstance workspace)
 169     {
 170         CV_Assert(handle);
 171
 172         /* we specalize for fp16 as the scaling factors must be provided as `float` */
 173         float alpha_ = alpha, beta_ = beta;
 174         if (lrnDesc.getType() == LRNDescriptor::LRNType::ACROSS_CHANNELS) {
 175             CUDA4DNN_CHECK_CUDNN(
 176                 cudnnLRNCrossChannelForward(
 177                     handle.get(),
 178                     lrnDesc.get(), CUDNN_LRN_CROSS_CHANNEL_DIM1,
 179                     &alpha_, inputDesc.get(), inputPtr.get(),
 180                     &beta_, outputDesc.get(), outputPtr.get()
 181                 )
 182             );
 183         } else if (lrnDesc.getType() == LRNDescriptor::LRNType::WITHIN_CHANNEL) {
 184             std::size_t size;
 185             CUDA4DNN_CHECK_CUDNN(cudnnGetTensorSizeInBytes(inputDesc.get(), &size));
 186
 187             DevicePtr<void> temp1 = workspace.get_span<half>(size).data();
 188             DevicePtr<void> temp2 = workspace.get_span<half>(size).data();
 189
 190             CUDA4DNN_CHECK_CUDNN(
 191                 cudnnDivisiveNormalizationForward(
 192                     handle.get(),
 193                     lrnDesc.get(), CUDNN_DIVNORM_PRECOMPUTED_MEANS,
 194                     &alpha_, inputDesc.get(), inputPtr.get(),
 195                     NULL,
 196                     static_cast<void*>(temp1), static_cast<void*>(temp2),
 197                     &beta_, outputDesc.get(), outputPtr.get()
 198                 )
 199             );
 200         }
 201     }
 202
 203 }}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */
 204
 205 #endif /* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_LRN_HPP */