1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 #ifndef OPENCV_DNN_CUDA4DNN_CSL_CUDNN_POOLING_HPP
6 #define OPENCV_DNN_CUDA4DNN_CSL_CUDNN_POOLING_HPP
10 #include "../pointer.hpp"
12 #include <opencv2/core.hpp>
20 #include <type_traits>
23 namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { namespace cudnn {
25 class PoolingDescriptor {
27 enum class PoolingType {
30 AVERAGE_EXCLUDE_PADDING,
31 AVERAGE_INCLUDE_PADDING
34 PoolingDescriptor() noexcept : descriptor{ nullptr } { }
35 PoolingDescriptor(const PoolingDescriptor&) = delete;
36 PoolingDescriptor(PoolingDescriptor&& other) noexcept
37 : descriptor{ other.descriptor } {
38 other.descriptor = nullptr;
41 /** constructs a pooling descriptor
44 * - \p window_size, \p padding and \p stride must have the same size
46 * The length of the containers is interpreted as the order of the pooling operation.
48 * Exception Guarantee: Basic
50 template <class SequenceContainer, typename = decltype(std::begin(std::declval<SequenceContainer>()))>
52 const SequenceContainer& window_size,
53 const SequenceContainer& padding,
54 const SequenceContainer& stride,
57 constructor(window_size, padding, stride, type);
60 ~PoolingDescriptor() noexcept {
61 if (descriptor != nullptr) {
62 /* cudnnDestroyPoolingDescriptor will not fail for a valid descriptor */
63 CUDA4DNN_CHECK_CUDNN(cudnnDestroyPoolingDescriptor(descriptor));
67 PoolingDescriptor& operator=(const PoolingDescriptor&) = delete;
68 PoolingDescriptor& operator=(PoolingDescriptor&& other) noexcept {
69 descriptor = other.descriptor;
70 other.descriptor = nullptr;
74 cudnnPoolingDescriptor_t get() const noexcept { return descriptor; }
77 template <class SequenceContainer>
79 const SequenceContainer& window_size,
80 const SequenceContainer& padding,
81 const SequenceContainer& stride,
84 CV_Assert(window_size.size() == padding.size());
85 CV_Assert(window_size.size() == stride.size());
87 auto get_pooling_type = [] (PoolingType type) {
89 case PoolingType::MAX:
90 return CUDNN_POOLING_MAX;
91 case PoolingType::MAX_DETERMINISTIC:
92 return CUDNN_POOLING_MAX_DETERMINISTIC;
93 case PoolingType::AVERAGE_EXCLUDE_PADDING:
94 return CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
95 case PoolingType::AVERAGE_INCLUDE_PADDING:
96 return CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
98 CV_Error(Error::StsBadArg, "unknown pooling type");
101 CUDA4DNN_CHECK_CUDNN(cudnnCreatePoolingDescriptor(&descriptor));
103 const auto rank = window_size.size();
105 CUDA4DNN_CHECK_CUDNN(
106 cudnnSetPooling2dDescriptor(
108 get_pooling_type(type), CUDNN_PROPAGATE_NAN,
109 window_size[0], window_size[1],
110 padding[0], padding[1],
115 std::vector<int> iwindow_size(std::begin(window_size), std::end(window_size));
116 std::vector<int> ipadding(std::begin(padding), std::end(padding));
117 std::vector<int> istride(std::begin(stride), std::end(stride));
118 CUDA4DNN_CHECK_CUDNN(
119 cudnnSetPoolingNdDescriptor(
121 get_pooling_type(type), CUDNN_PROPAGATE_NAN,
122 rank, iwindow_size.data(), ipadding.data(), istride.data()
127 /* cudnnDestroyPoolingDescriptor will not fail for a valid descriptor */
128 CUDA4DNN_CHECK_CUDNN(cudnnDestroyPoolingDescriptor(descriptor));
133 cudnnPoolingDescriptor_t descriptor;
136 /** gives the shape of the output tensor after pooling
138 * @note it's not required to enforce the this shape in the output tensor; slightly different shapes will work
140 * Exception Guarantee: Basic
142 template <class T> inline
143 void getPoolingForwardOutputDim(
144 const PoolingDescriptor& poolingDesc,
145 const TensorDescriptor<T>& inputDesc,
146 std::vector<int>& output_dim)
149 output_dim.resize(CUDNN_DIM_MAX); /* we use `output_dim` to hold temporaries */
151 std::vector<int> temp(CUDNN_DIM_MAX);
152 cudnnDataType_t tempDataType;
153 CUDA4DNN_CHECK_CUDNN(
154 cudnnGetTensorNdDescriptor(
156 CUDNN_DIM_MAX + 1, /* according to docs, this is what we do to get the rank */
164 const auto rank = output_dim[0];
165 output_dim.resize(rank);
166 CUDA4DNN_CHECK_CUDNN(
167 cudnnGetPoolingNdForwardOutputDim(poolingDesc.get(), inputDesc.get(), rank, output_dim.data())
171 /** @brief performs pooling operation
173 * dstValue = alpha * result + beta * priorDstValue
175 * @tparam T pooling element type (must be `half` or `float`)
177 * @param handle valid cuDNN Handle
178 * @param poolingDesc pooling description
179 * @param inputDesc tensor descriptor describing the input
180 * @param[in] inputPtr pointer to input tensor in device memory
181 * @param alpha result scale factor
182 * @param beta previous value scale factor
183 * @param outputDesc tensor descriptor describing the output
184 * @param[out] outputPtr pointer to output tensor in device memory
186 * Exception Guarantee: Basic
190 const Handle& handle,
191 const PoolingDescriptor& poolingDesc,
192 const TensorDescriptor<T>& inputDesc,
193 const DevicePtr<const T> inputPtr,
195 const TensorDescriptor<T>& outputDesc,
196 DevicePtr<T> outputPtr)
200 CUDA4DNN_CHECK_CUDNN(
204 &alpha, inputDesc.get(), inputPtr.get(),
205 &beta, outputDesc.get(), outputPtr.get()
212 const Handle& handle,
213 const PoolingDescriptor& poolingDesc,
214 const TensorDescriptor<half>& inputDesc,
215 const DevicePtr<const half> inputPtr,
216 half alpha, half beta,
217 const TensorDescriptor<half>& outputDesc,
218 DevicePtr<half> outputPtr)
222 /* we specalize for fp16 as the scaling factors must be provided as `float` */
223 float alpha_ = alpha, beta_ = beta;
224 CUDA4DNN_CHECK_CUDNN(
228 &alpha_, inputDesc.get(), inputPtr.get(),
229 &beta_, outputDesc.get(), outputPtr.get()
234 }}}}} /* namespace cv::dnn::cuda4dnn::csl::cudnn */
236 #endif /* OPENCV_DNN_CUDA4DNN_CSL_CUDNN_POOLING_HPP */